#include <opencv2/opencv.hpp> |
#include <openpose/flags.hpp> |
#include <openpose/headers.hpp> |
#ifdef USE_CAFFE |
#include <caffe/blob.hpp> |
#endif |
#ifdef USE_OPENCL |
#include <openpose_private/gpu/opencl.hcl> |
#include <openpose_private/gpu/cl2.hpp> |
DEFINE_string(image_path, "examples/media/COCO_val2014_000000000192.jpg", "Process the desired image."); |
typedef cl::KernelFunctor<cl::Buffer, int, int, float> ScaleFunctor; |
const std::string scaleKernelString = MULTI_LINE_STRING( |
__kernel void scaleKernel(__global float* targetPtr, const int targetWidth, const int targetHeight, |
const float scale) |
{ |
int x = get_global_id(0); |
int y = get_global_id(1); |
int c = get_global_id(2); |
__global float* targetPtrC = &targetPtr[c*targetWidth*targetHeight]; |
targetPtrC[y*targetWidth+x] *= scale; |
} |
); |
int clTest() |
{ |
try |
{ |
cv::Mat img = cv::imread(FLAGS_image_path); |
if(img.empty()) |
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__); |
cv::Mat imgResize; cv::resize(img, imgResize, cv::Size(368,368)); |
cv::Mat imgFloat; imgResize.convertTo(imgFloat, CV_32FC3); |
imgFloat /= 255.; |
int imageVolume = imgFloat.size().width * imgFloat.size().height * imgFloat.channels(); |
std::cout << imgFloat.channels() << std::endl; |
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
std::vector<int> devices; |
const int maxNumberGpu = op::OpenCL::getTotalGPU(); |
for (auto i = 0; i < maxNumberGpu; i++){ |
devices.emplace_back(i); |
std::cout << i << std::endl; |
} |
caffe::Caffe::SetDevices(devices); |
std::unique_ptr<caffe::Net<float>> upCaffeNet; |
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
caffe::Caffe::SelectDevice(0, true); |
upCaffeNet.reset(new caffe::Net<float>{ |
"models/pose/coco/pose_deploy_linevec.prototxt", caffe::TEST, caffe::Caffe::GetDefaultDevice()}); |
upCaffeNet->CopyTrainedLayersFrom("models/pose/coco/pose_iter_440000.caffemodel"); |
op::OpenCL::getInstance(0, CL_DEVICE_TYPE_GPU, true); |
upCaffeNet->blobs()[0]->Reshape({1,imgFloat.channels(),imgResize.size().width,imgResize.size().height}); |
upCaffeNet->Reshape(); |
caffe::BlobProto blob_proto; |
blob_proto.set_channels(3); |
blob_proto.set_height(imgResize.size().width); |
blob_proto.set_width(imgResize.size().height); |
blob_proto.clear_data(); |
for (int c = 0; c < 3; ++c) |
for (int h = 0; h < imgResize.size().height; ++h) |
for (int w = 0; w < imgResize.size().width; ++w) |
blob_proto.add_data(imgResize.at<cv::Vec3f>(h, w)[c]); |
blob_proto.set_num(1); |
caffe::Blob<float>* input_layer = upCaffeNet->input_blobs()[0]; |
input_layer->FromProto(blob_proto); |
upCaffeNet->Forward(0); |
boost::shared_ptr<caffe::Blob<float>> output_blob = upCaffeNet->blob_by_name("net_output"); |
cl::Device& device = op::OpenCL::getInstance(0)->getDevice(); |
cl_uint mem_align; |
clGetDeviceInfo(device.get(), CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(mem_align), &mem_align, nullptr); |
std::cout << "Alignment in bits of the base address : " << mem_align << std::endl; |
cv::Mat finalImage = imgFloat; |
try{ |
float* gpuPtr = output_blob->mutable_gpu_data(); |
cl::Buffer outputBuffer((cl_mem)gpuPtr, true); |
float* heatmaps = new float[output_blob->shape()[1] * output_blob->shape()[2] * output_blob->shape()[3]]; |
op::OpenCL::getInstance(0)->getQueue().enqueueReadBuffer( |
outputBuffer, CL_TRUE, 0, |
output_blob->shape()[1] * output_blob->shape()[2] * output_blob->shape()[3] * sizeof(float), heatmaps); |
int heatmapChannels = output_blob->shape()[1]; |
int shape = output_blob->shape()[2] * output_blob->shape()[3]; |
for(int i=0; i<heatmapChannels; i++){ |
cv::Mat hm(cv::Size(output_blob->shape()[2], output_blob->shape()[3]), CV_32FC1); |
cl_buffer_region sourceRegion; |
op::OpenCL::getBufferRegion<float>(sourceRegion, i * shape, shape); |
cl::Buffer regionBuffer = outputBuffer.createSubBuffer(CL_MEM_READ_WRITE, |
&sourceRegion); |
} |
} |
#if defined(USE_OPENCL) && defined(CL_HPP_ENABLE_EXCEPTIONS) |
catch (const cl::Error& e) |
{ |
op::error(std::string(e.what()) + " : " + op::OpenCL::clErrorToString(e.err()) + " ID: " + |
std::to_string(0), __LINE__, __FUNCTION__, __FILE__); |
} |
#endif |
catch (const std::exception& e) |
{ |
op::error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
} |
cv::imshow("win", finalImage); |
cv::waitKey(0); |
return 0; |
} |
catch (const std::exception& e) |
{ |
op::error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
return -1; |
} |
} |
#endif |
int main() |
{ |
#ifdef USE_OPENCL |
gflags::ParseCommandLineFlags(&argc, &argv, true); |
std::thread t(&clTest); |
t.join(); |
return 0; |
#else |
op::error("OpenPose must be compiled with the `USE_CAFFE` & `USE_OPENCL` macro definitions in order to run" |
" this functionality.", __LINE__, __FUNCTION__, __FILE__); |
return -1; |
#endif |
} |