{ "architectures": [ "ViTPoseForPoseEstimation" ], "depth": 12, "drop_path_rate": 0.1, "dropout_p": 0.0, "embed_dim": 384, "flip_test": false, "img_size": [ 256, 192 ], "initializer_range": 1, "kernel": 3, "keypoint_in_channels": 382, "keypoint_num_deconv_filters": [ 256, 256 ], "keypoint_num_deconv_kernels": [ 4, 4 ], "keypoint_num_deconv_layer": 2, "mlp_ratio": 4, "model_type": "vitpose", "num_attention_heads": 12, "num_channels": 3, "num_joints": 17, "num_output_channels": 17, "patch_size": 16, "qkv_bias": true, "ratio": 1, "target_type": "GaussianHeatMap", "torch_dtype": "float32", "transformers_version": "4.32.0.dev0", "udp": true, "use_checkpoint": false }