megadetdlc / app.py
vchiang001's picture
Update app.py
efd82c6
# Built from megadetector section from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
# Built from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/
# Built from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/
# %%
from tkinter import W
import gradio as gr
import torch
import torchvision
from dlclive import DLCLive, Processor
from dlcmodel.models import DownloadModel
from PIL import Image, ImageColor, ImageFont, ImageDraw
import matplotlib
import numpy as np
import math
from math import dist #vsccchange
from statistics import mean #vsccchange
import json
import os
import yaml
import pandas as pd #vsccchange
from dlcmodel.models import DownloadModel
from save_results import save_results
import pdb
#########################################
# %%
# Input params
FONTS = {'amiko': "font/Amiko-Regular.ttf",
'nature': "font/LoveNature.otf",
'painter':"font/PainterDecorator.otf",
'animals': "font/UncialAnimals.ttf",
'zen': "font/ZEN.TTF"}
DLCFOLDERS = {'full_cat': "dlcmodel/DLC_Cat/",
'full_dog': "dlcmodel/DLC_Dog/",
'full_cheetah': "dlcmodel/DLC_Cheetah/",
'full_human': "dlcmodel/DLC_human_dancing/",
'full_macaque': "dlcmodel/models/DLC_monkey/",
'quadruped': "dlcmodel/DLC_ma_superquadruped_resnet_50_iteration-0_shuffle-1/"}
DLCMODELS = {'full_cat': "dlcmodel/DLC_Cat_resnet_50_iteration-0_shuffle-0",
'full_dog': "dlcmodel/DLC_Dog_resnet_50_iteration-0_shuffle-0",
'full_cheetah': "dlcmodel/DLC_Cheetah_resnet_152_iteration-27_shuffle-1",
'full_human': "dlcmodel/DLC_human_dancing_resnet_101_iteration-0_shuffle-1",
'full_macaque': "dlcmodel/models/DLC_monkey_resnet_50_iteration-0_shuffle-1",
'quadruped': "dlcmodel/DLC_ma_superquadruped_resnet_50_iteration-0_shuffle-1"}
Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
'md_v5b': "megadet_model/md_v5b.0.0.pt"}
DLC_models_list = ['full_cat', 'full_cheetah', 'full_dog','full_human', 'full_macaque', 'quadruped']
#############################################
# %%
# User interface: inputs
# Input image
gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
# Models
gr_dlc_model_input = gr.inputs.Dropdown(choices=list(DLC_models_list), # choices
default='full_cat', # default option
type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
label='Select DeepLabCut model')
gr_mega_model_input = gr.inputs.Dropdown(choices=list(Megadet_Models.keys()),
default='md_v5a', # default option
type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
label='Select MegaDetector model')
# Other inputs
gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
label='Run DLClive only, directly on input image') #vscchange no question mark
gr_str_labels_checkbox = gr.inputs.Checkbox(True,
label='Show bodypart labels') #vscchange no question mark
gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.02,0.8,
label='Set confidence threshold for animal detections')
gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
label='Set confidence threshold for keypoints')
# Data viz
gr_keypt_color = gr.ColorPicker(value ="#ff0000", label="choose color for keypoint label")
gr_labels_font_style = gr.inputs.Dropdown(choices=['amiko', 'animals', 'nature', 'painter', 'zen'], #vscchange alphabetical order
default='amiko',
type='value',
label='Select keypoint label font')
gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
label='Set font size (pt)') #vscchange using units
gr_slider_marker_size = gr.inputs.Slider(1,5,0.2,2, #vsccchange - change marker scales
label='Set marker size (pixel)') #vscchange using units
gr_mega_bb_color = gr.ColorPicker(value ="#ff0000", label="choose color for megadetector bounding box") #vscchange
gr_mega_bb_width = gr.inputs.Slider(1,20,1,5,
label='Set width of megadetector bounding box)') #vscchange
# list of inputs
inputs = [gr_image_input,
gr_mega_model_input,
gr_dlc_model_input,
gr_dlc_only_checkbox,
gr_str_labels_checkbox,
gr_slider_conf_bboxes,
gr_slider_conf_keypoints,
gr_labels_font_style,
gr_slider_font_size,
gr_keypt_color,
gr_slider_marker_size,
gr_mega_bb_color,#vscchange
gr_mega_bb_width, #vscchange
]
#image = gr.inputs.Image(type="pil", label="Input Image")
#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
#size = 640
#########################################
# %%
# Draw keypoints on image
def draw_keypoints_on_image(image,
keypoints,
map_label_id_to_str,
flag_show_str_labels,
use_normalized_coordinates=True,
font_style='amiko',
font_size=8,
keypt_color="#ff0000",
marker_size=2,
):
"""Draws keypoints on an image.
Modified from:
https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
Args:
image: a PIL.Image object.
keypoints: a numpy array with shape [num_keypoints, 2].
map_label_id_to_str: dict with keys=label number and values= label string
flag_show_str_labels: boolean to select whether or not to show string labels
color: color to draw the keypoints with. Default is red.
radius: keypoint radius. Default value is 2.
use_normalized_coordinates: if True (default), treat keypoint values as
relative to the image. Otherwise treat them as absolute.
"""
# get a drawing context
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
keypoints_x = [k[0] for k in keypoints]
keypoints_y = [k[1] for k in keypoints]
alpha = [k[2] for k in keypoints]
# adjust keypoints coords if required
if use_normalized_coordinates:
keypoints_x = tuple([im_width * x for x in keypoints_x])
keypoints_y = tuple([im_height * y for y in keypoints_y])
cmap = matplotlib.cm.get_cmap('hsv')
cmap2 = matplotlib.cm.get_cmap('Greys')
# draw ellipses around keypoints
for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
round_fill = [round(num*255) for num in list(cmap(i*10))[:3]] #check!
round_outline = [round(num*255) for num in list(cmap2(alpha[i]))[:3]]
draw.ellipse([(keypoint_x - marker_size, keypoint_y - marker_size),
(keypoint_x + marker_size, keypoint_y + marker_size)],
fill=tuple(round_fill), outline= tuple(round_outline), width=2) #fill and outline: [0,255]
# add string labels around keypoints
if flag_show_str_labels:
font = ImageFont.truetype(FONTS[font_style],
font_size)
draw.text((keypoint_x + marker_size, keypoint_y + marker_size),#(0.5*im_width, 0.5*im_height), #-------
map_label_id_to_str[i],
ImageColor.getcolor(keypt_color, "RGB"), # rgb
font=font)
############################################
# %%
# Predict detections with MegaDetector v5a model
def predict_md(im,
mega_model_input,
size=640):
# resize image
g = (size / max(im.size)) # multipl factor to make max size of the image equal to input size
im = im.resize((int(x * g) for x in im.size),
Image.ANTIALIAS) # resize
MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[mega_model_input])
## detect objects
results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
return results
##########################################
# %%
def crop_animal_detections(img_in,
yolo_results,
likelihood_th):
## Extract animal crops
list_labels_as_str = [i for i in yolo_results.names.values()] # ['animal', 'person', 'vehicle']
list_np_animal_crops = []
# image to crop (scale as input for megadetector)
img_in = img_in.resize((yolo_results.ims[0].shape[1],
yolo_results.ims[0].shape[0]))
# for every detection in the img
for det_array in yolo_results.xyxy:
# for every detection
for j in range(det_array.shape[0]):
# compute coords around bbox rounded to the nearest integer (for pasting later)
xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
ymin_rd = int(math.floor(det_array[j,1]))
xmax_rd = int(math.ceil(det_array[j,2]))
ymax_rd = int(math.ceil(det_array[j,3]))
pred_llk = det_array[j,4]
pred_label = det_array[j,5]
# keep animal crops above threshold
if (pred_label == list_labels_as_str.index('animal')) and \
(pred_llk >= likelihood_th):
area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
#pdb.set_trace()
crop = img_in.crop(area) #Image.fromarray(img_in).crop(area)
crop_np = np.asarray(crop)
# add to list
list_np_animal_crops.append(crop_np)
return list_np_animal_crops
#########################################
# %%
def draw_rectangle_text(img,results,font_style='amiko',font_size=8, keypt_color="white", mega_bb_color='red',
mega_bb_width='5'): #vsccchange
#pdb.set_trace()
bbxyxy = results
print("bbxyxy")
print(bbxyxy)
w, h = bbxyxy[2], bbxyxy[3]
print("w is")
print(w)
print("h is")
print(h)
print("bbxyxy[0]")
print(bbxyxy[0])
print("bbxyxy[1]")
print(bbxyxy[1])
shape = [(bbxyxy[0], bbxyxy[1]), (w , h)]
imgR = ImageDraw.Draw(img)
imgR.rectangle(shape, outline =mega_bb_color,width=mega_bb_width) ##bb for animal #vscchange
print("confidence")
print(bbxyxy[4])
confidence = bbxyxy[4]
string_bb = 'animal ' + str(round(confidence, 2))
font = ImageFont.truetype(FONTS[font_style], font_size)
text_size = font.getsize(string_bb) # (h,w)
print("text_size")
print(text_size)
print("text_size[1]")
print(text_size[1])
position = (bbxyxy[0],bbxyxy[1] - text_size[1] -2 )
left, top, right, bottom = imgR.textbbox(position, string_bb, font=font)
imgR.rectangle((left, top-mega_bb_width, right+mega_bb_width, bottom+mega_bb_width), fill=mega_bb_color) #vscchange, instead of red by default
imgR.text((bbxyxy[0] + 3 ,bbxyxy[1] - text_size[1] -2 ), string_bb, font=font, fill=keypt_color) #vscchange, instead of black by default
return imgR
##########################################
# %%
def predict_dlc(list_np_crops,
kpts_likelihood_th,
DLCmodel,
dlc_proc):
# run dlc thru list of crops
dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
print(list_np_crops)
print(list_np_crops[0])
print(type(list_np_crops))
dlc_live.init_inference(list_np_crops[0])
list_kpts_per_crop = []
all_kypts = []
np_aux = np.empty((1,3)) # can I avoid hardcoding here?
for crop in list_np_crops:
# scale crop here?
keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
# set kpts below threhsold to nan
#pdb.set_trace()
keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
# add kpts of this crop to list
list_kpts_per_crop.append(keypts_xyp)
all_kypts.append(keypts_xyp)
#return confidence here
return list_kpts_per_crop
####################################################
def social(output_file): #vsccchange
###############################
# extract information from the json file
with open(output_file) as f:
json_data = json.load(f)
print(json_data)
#see how many bb there are
print("json_data[number_of_bb]")
print(json_data["number_of_bb"])
#for each bounding box, depending on how many there are
bb_list = []
for num in range((json_data["number_of_bb"])):
bb = "bb_" + str(num)
bb_list.append(bb)
print("bb_list")
print(bb_list)
# for each bounding box, print "dlc_pred"
for bb_num in bb_list:
bb_cont = json_data[bb_num]
print(bb_cont["dlc_pred"])
################################
# reference length using nose to eyes
# use the first bb if have nose to eyes, or any that has?
eye_names = ['L_Eye', 'R_Eye', 'r_eye', 'l_eye', 'forehead', 'left_eye', 'right_eye'] #names to reflect back of all animal models
eye_list = []
nose_names = ['Nose', 'nose', 'chin'] #names to reflect back of all animal models
nose_list = [ ]
# if eyes are in the dlc pred for one bb, then use that
# if eyes are present, then use the nose of the same animal... (second for loop)
for bb_num in bb_list:
bb_cont = json_data[bb_num]
bb_body = bb_cont["dlc_pred"]
for bodypart in bb_body:
if bodypart in eye_names:
print("bb_body[bodypart]")
print(bb_body[bodypart])
eye_list.append(bb_body[bodypart])
for bodypart in bb_body:
if bodypart in nose_names:
print("bb_body[bodypart]")
print(bb_body[bodypart])
nose_list.append(bb_body[bodypart])
break
else:
print("no eyes & nose present")
print("eye_list")
print(eye_list)
print("nose_list")
print(nose_list)
# obtain only the x & y from the list (not llk)
eye_list_coord = []
for pose in eye_list:
fl_pose = [float(i) for i in pose] #turn all into float
if fl_pose[2] < 1.0: #set this threshold for pose likelihood, should be 0.06, but keep for now for demosntration
eye_list_coord.append(fl_pose[:2])
print("eye_list_coord")
print(eye_list_coord)
# obtain only the x & y from the list (not llk)
nose_list_coord = []
for pose in nose_list:
fl_pose = [float(i) for i in pose] #turn all into float
if fl_pose[2] < 1.0: #set this threshold for pose likelihood, should be 0.06, but keep for now for demosntration
nose_list_coord.append(fl_pose[:2])
print("nose_list_coord")
print(nose_list_coord)
# obtain the dist between both eyes (or one eye)
nose_coord = max(nose_list_coord)
nose2eye_dist = [] #list of all comparing coordinates
for eye in eye_list_coord:
dist = math.dist(eye, nose_coord)
nose2eye_dist.append(dist)
print("nose2eye_dist")
print(nose2eye_dist)
ref = mean(nose2eye_dist)
print("ref")
print(ref)
################################
# if there are more than 2 bounding box with animals present
# if you have any keypoints in the same vincity of keypoints to another
# loop around one animal pose, and another
#if json_data["number_of_bb"] >= 2:
#save each bounding box as separate variable
bb_dlc = {}
for bb_num in bb_list:
bb_cont = json_data[bb_num]
bb_body = bb_cont["dlc_pred"]
bb_dlc[str(bb_num)] = bb_body
print("bb_dlc")
print(bb_dlc)
# this one is just obtaining the coordinates, no bodyparts required
bb_dlc_xy = {}
for bb_num in bb_dlc:
bb_body = bb_dlc[bb_num]
#print("bb_body")
#print(bb_body)
bb_dlc_xy[bb_num] = []
for body in bb_body:
bodyval = bb_body[body]
#print("bodyval")
#print(body)
#print(bodyval)
bodyval_xy = np.delete(bodyval, obj=2)
bodyval_xy = list(bodyval_xy) #make it a list instead of array
#print("bodyval after delete")
#print(body)
#print(bodyval_xy)
bb_dlc_xy[bb_num].append(bodyval_xy) #body to just xy coordinates
print("bb_dlc_xy")
print(bb_dlc_xy) #output individual dictionaries with x & y
################################
#calculates the euclidean distance between two bounding box, for all poses
#loop over the number of bounding boxes... currently presumes 2, but need to change to any num
int_matrx = []
for x,y in bb_dlc_xy['bb_0']:
bb0 = x,y
for a,b in bb_dlc_xy['bb_1']:
bb1 = a,b
soc = math.dist(bb0, bb1)
int_matrx.append(soc)
print(int_matrx)
# if any of the dist in the list is below the reference
if any(i < ref for i in int_matrx):
print("physically interacting")
phys_int = "physically interacting"
else:
print("not physically interacting")
phys_int = "not physically interacting"
return phys_int
#####################################################
#####################################################
# %%
def predict_pipeline(img_input,
mega_model_input,
dlc_model_input_str,
flag_dlc_only,
flag_show_str_labels,
bbox_likelihood_th,
kpts_likelihood_th,
font_style,
font_size,
keypt_color,
marker_size,
mega_bb_color, #vsccchange
mega_bb_width #vscchange
):
############################################################
## Get DLC model and labels as strings
# TODO: make a dict as for megadetector
# pdb.set_trace()
path_to_DLCmodel = DLCFOLDERS[dlc_model_input_str] #DownloadModel(dlc_model_input_str, DLCFOLDERS[dlc_model_input_str])
pose_cfg_path = DLCFOLDERS[dlc_model_input_str]+'pose_cfg.yaml'
#pdb.set_trece()
# extract map label ids to strings
# pose_cfg_dict['all_joints'] is a list of one-element lists,
with open(pose_cfg_path, "r") as stream:
pose_cfg_dict = yaml.safe_load(stream)
map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']],
pose_cfg_dict['all_joints_names'])])
############################################################
# ### Run Megadetector
md_results = predict_md(img_input,
mega_model_input,
size=640) #Image.fromarray(results.imgs[0])
#pdb.set_trace()
################################################################
# Obtain animal crops for bboxes with confidence above th
list_crops = crop_animal_detections(img_input,
md_results,
bbox_likelihood_th)
##############################################################
# Run DLC
dlc_proc = Processor()
# if required: ignore MD crops and run DLC on full image [mostly for testing]
if flag_dlc_only:
# compute kpts on input img
list_kpts_per_crop = predict_dlc([np.asarray(img_input)],
kpts_likelihood_th,
path_to_DLCmodel,
dlc_proc)
# draw kpts on input img #fix!
draw_keypoints_on_image(img_input,
list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
map_label_id_to_str,
flag_show_str_labels,
use_normalized_coordinates=False,
font_style=font_style,
font_size=font_size,
keypt_color=keypt_color,
marker_size=marker_size)
return img_input, download_file
else:
# Compute kpts for each crop
list_kpts_per_crop = predict_dlc(list_crops,
kpts_likelihood_th,
"dlcmodel/DLC_ma_superquadruped_resnet_50_iteration-0_shuffle-1",
#path_to_DLCmodel,
dlc_proc)
img_background = img_input.resize((md_results.ims[0].shape[1],md_results.ims[0].shape[0]))
print('I have ' + str(len(list_crops)) + ' bounding box')
for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
list_kpts_per_crop)):
## Draw keypts on crop
img_crop = Image.fromarray(np_crop)
draw_keypoints_on_image(img_crop,
kpts_crop, # a numpy array with shape [num_keypoints, 2].
map_label_id_to_str,
flag_show_str_labels,
use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn for list_kpts_crop
font_style=font_style,
font_size=font_size,
keypt_color=keypt_color,
marker_size=marker_size)
## Paste crop in original image https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
img_background.paste(img_crop, box = tuple([int(t) for t in md_results.xyxy[0][ic,:2]]))
#set trh!! FIXME
bb_per_animal = md_results.xyxy[0].tolist()[ic]
pred = md_results.xyxy[0].tolist()[ic][4]
if bbox_likelihood_th < pred:
draw_rectangle_text(img_background, bb_per_animal ,font_style=font_style,font_size=font_size, keypt_color=keypt_color,
mega_bb_color=mega_bb_color, mega_bb_width=mega_bb_width) #vsccchange
print(pred)
download_file = save_results(md_results,list_kpts_per_crop,map_label_id_to_str,bbox_likelihood_th)
phys_int = social(download_file)
return img_background, download_file, phys_int
####################################################
# %%
# User interface: outputs
gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
out_smpl_npy_download = gr.File(label="Download JSON file")
gr_behaviomics_output = gr.Textbox(label = 'what behaviour is happening?') #vsccchange
outputs = [gr_image_output,out_smpl_npy_download, gr_behaviomics_output]
##############################################
# %%
# User interace: description
gr_title = "megadetdlc TRIALLING"
gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022..\
This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
We host models from the <a href='http://www.mackenziemathislab.org/dlc-modelzoo'>DeepLabCut ModelZoo Project</a>\, and two <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>MegaDetector Models</a>. Please carefully check their licensing information if you use this project. The App additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
<a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
<a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\."
###########################################
# %%
demo = gr.Interface(predict_pipeline,
inputs=inputs,
outputs=outputs,
title=gr_title,
description=gr_description,
theme="huggingface",
#live=True
)
demo.launch(enable_queue=True,
#share=True
)
# %%