diff --git a/README.md b/README.md index 2e3341b9..be8dddb9 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,9 @@ +a google colab link to test +https://colab.research.google.com/drive/1o9RhThxyxHr4P3n6a19UGmc5LTRn5zfp?usp=sharing + ## News! - Dec 2019: [**v0.3.0** version](https://github.com/MVIG-SJTU/AlphaPose) of AlphaPose is released! Smaller model, higher accuracy! diff --git a/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp b/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp index a07426a0..ffe740db 100644 --- a/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp +++ b/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp @@ -63,26 +63,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, at::Tensor weight, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int group, int deformable_group) { - AT_CHECK(weight.ndimension() == 4, + TORCH_CHECK(weight.ndimension() == 4, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " "but got: %s", weight.ndimension()); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - AT_CHECK(kW > 0 && kH > 0, + TORCH_CHECK(kW > 0 && kH > 0, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); - AT_CHECK((weight.size(2) == kH && weight.size(3) == kW), + TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), "kernel size should be consistent with weight, ", "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, kW, weight.size(2), weight.size(3)); - AT_CHECK(dW > 0 && dH > 0, + TORCH_CHECK(dW > 0 && dH > 0, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); - AT_CHECK( + TORCH_CHECK( dilationW > 0 && dilationH > 0, "dilation should be greater than 0, but got dilationH: %d dilationW: %d", dilationH, dilationW); @@ -98,7 +98,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, dimw++; } - AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", + TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", ndim); long nInputPlane = weight.size(1) * group; @@ -110,7 +110,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - AT_CHECK(nInputPlane % deformable_group == 0, + TORCH_CHECK(nInputPlane % deformable_group == 0, "input channels must divide deformable group size"); if (outputWidth < 1 || outputHeight < 1) @@ -120,27 +120,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, outputWidth); - AT_CHECK(input.size(1) == nInputPlane, + TORCH_CHECK(input.size(1) == nInputPlane, "invalid number of input planes, expected: %d, but got: %d", nInputPlane, input.size(1)); - AT_CHECK((inputHeight >= kH && inputWidth >= kW), + TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), "input image is smaller than kernel"); - AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), + TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), "invalid spatial size of offset, expected height: %d width: %d, but " "got height: %d width: %d", outputHeight, outputWidth, offset.size(2), offset.size(3)); - AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), + TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), "invalid number of channels of offset"); if (gradOutput != NULL) { - AT_CHECK(gradOutput->size(dimf) == nOutputPlane, + TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, "invalid number of gradOutput planes, expected: %d, but got: %d", nOutputPlane, gradOutput->size(dimf)); - AT_CHECK((gradOutput->size(dimh) == outputHeight && + TORCH_CHECK((gradOutput->size(dimh) == outputHeight && gradOutput->size(dimw) == outputWidth), "invalid size of gradOutput, expected height: %d width: %d , but " "got height: %d width: %d", @@ -191,7 +191,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); @@ -298,7 +298,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, @@ -414,7 +414,7 @@ int deform_conv_backward_parameters_cuda( long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, @@ -494,8 +494,8 @@ void modulated_deform_conv_cuda_forward( const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -576,8 +576,8 @@ void modulated_deform_conv_cuda_backward( int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -698,4 +698,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("modulated_deform_conv_cuda_backward", &modulated_deform_conv_cuda_backward, "modulated deform conv backward (CUDA)"); -} \ No newline at end of file +} diff --git a/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu b/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu index a2b94286..7a08c7e6 100644 --- a/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu +++ b/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu @@ -257,9 +257,9 @@ void deformable_im2col( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "deformable_im2col_gpu", ([&] { - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - scalar_t *data_col_ = data_col.data(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, @@ -351,9 +351,9 @@ void deformable_col2im( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_offset_ = data_offset.data(); - scalar_t *grad_im_ = grad_im.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, @@ -449,10 +449,10 @@ void deformable_col2im_coord( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - scalar_t *grad_offset_ = grad_offset.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); deformable_col2im_coord_gpu_kernel<<>>( num_kernels, data_col_, data_im_, data_offset_, channels, height, width, @@ -779,10 +779,10 @@ void modulated_deformable_im2col_cuda( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - const scalar_t *data_mask_ = data_mask.data(); - scalar_t *data_col_ = data_col.data(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); modulated_deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, @@ -811,10 +811,10 @@ void modulated_deformable_col2im_cuda( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_offset_ = data_offset.data(); - const scalar_t *data_mask_ = data_mask.data(); - scalar_t *grad_im_ = grad_im.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); modulated_deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, @@ -844,12 +844,12 @@ void modulated_deformable_col2im_coord_cuda( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - const scalar_t *data_mask_ = data_mask.data(); - scalar_t *grad_offset_ = grad_offset.data(); - scalar_t *grad_mask_ = grad_mask.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + scalar_t *grad_mask_ = grad_mask.data_ptr(); modulated_deformable_col2im_coord_gpu_kernel<<>>( num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, diff --git a/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp b/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp index 9e0e3ffc..f6f087b8 100644 --- a/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp +++ b/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp @@ -33,7 +33,7 @@ void deform_psroi_pooling_cuda_forward( at::Tensor top_count, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -59,8 +59,8 @@ void deform_psroi_pooling_cuda_backward( const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); diff --git a/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu b/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu index 1922d724..b0aa5165 100644 --- a/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu +++ b/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu @@ -290,11 +290,11 @@ void DeformablePSROIPoolForward(const at::Tensor data, AT_DISPATCH_FLOATING_TYPES_AND_HALF( data.scalar_type(), "deformable_psroi_pool_forward", ([&] { - const scalar_t *bottom_data = data.data(); - const scalar_t *bottom_rois = bbox.data(); - const scalar_t *bottom_trans = no_trans ? NULL : trans.data(); - scalar_t *top_data = out.data(); - scalar_t *top_count_data = top_count.data(); + const scalar_t *bottom_data = data.data_ptr(); + const scalar_t *bottom_rois = bbox.data_ptr(); + const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr(); + scalar_t *top_data = out.data_ptr(); + scalar_t *top_count_data = top_count.data_ptr(); DeformablePSROIPoolForwardKernel<<>>( count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width, @@ -341,13 +341,13 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad, AT_DISPATCH_FLOATING_TYPES_AND_HALF( out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] { - const scalar_t *top_diff = out_grad.data(); - const scalar_t *bottom_data = data.data(); - const scalar_t *bottom_rois = bbox.data(); - const scalar_t *bottom_trans = no_trans ? NULL : trans.data(); - scalar_t *bottom_data_diff = in_grad.data(); - scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data(); - const scalar_t *top_count_data = top_count.data(); + const scalar_t *top_diff = out_grad.data_ptr(); + const scalar_t *bottom_data = data.data_ptr(); + const scalar_t *bottom_rois = bbox.data_ptr(); + const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr(); + scalar_t *bottom_data_diff = in_grad.data_ptr(); + scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data_ptr(); + const scalar_t *top_count_data = top_count.data_ptr(); DeformablePSROIPoolBackwardAccKernel<<>>( count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width, @@ -361,4 +361,4 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad, { printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err)); } -} \ No newline at end of file +} diff --git a/alphapose/utils/roi_align/src/roi_align_cuda.cpp b/alphapose/utils/roi_align/src/roi_align_cuda.cpp index 4e608161..eaa5d2de 100644 --- a/alphapose/utils/roi_align/src/roi_align_cuda.cpp +++ b/alphapose/utils/roi_align/src/roi_align_cuda.cpp @@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int pooled_height, const int pooled_width, at::Tensor bottom_grad); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) @@ -82,4 +82,4 @@ int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); -} \ No newline at end of file +} diff --git a/alphapose/utils/roi_align/src/roi_align_kernel.cu b/alphapose/utils/roi_align/src/roi_align_kernel.cu index b914fd1e..c34cec21 100644 --- a/alphapose/utils/roi_align/src/roi_align_kernel.cu +++ b/alphapose/utils/roi_align/src/roi_align_kernel.cu @@ -132,9 +132,9 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, const int output_size = num_rois * pooled_height * pooled_width * channels; AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "ROIAlignLaucherForward", ([&] { - const scalar_t *bottom_data = features.data(); - const scalar_t *rois_data = rois.data(); - scalar_t *top_data = output.data(); + const scalar_t *bottom_data = features.data_ptr(); + const scalar_t *rois_data = rois.data_ptr(); + scalar_t *top_data = output.data_ptr(); ROIAlignForward <<>>( @@ -275,9 +275,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] { - const scalar_t *top_diff = top_grad.data(); - const scalar_t *rois_data = rois.data(); - scalar_t *bottom_diff = bottom_grad.data(); + const scalar_t *top_diff = top_grad.data_ptr(); + const scalar_t *rois_data = rois.data_ptr(); + scalar_t *bottom_diff = bottom_grad.data_ptr(); if (sizeof(scalar_t) == sizeof(double)) { fprintf(stderr, "double is not supported\n"); exit(-1); @@ -291,4 +291,4 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, })); THCudaCheck(cudaGetLastError()); return 1; -} \ No newline at end of file +} diff --git a/detector/nms/src/nms_cpu.cpp b/detector/nms/src/nms_cpu.cpp index f7cffb49..31e984e6 100644 --- a/detector/nms/src/nms_cpu.cpp +++ b/detector/nms/src/nms_cpu.cpp @@ -3,7 +3,7 @@ template at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { - AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); + AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); if (dets.numel() == 0) { return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); @@ -23,13 +23,13 @@ at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); - auto suppressed = suppressed_t.data(); - auto order = order_t.data(); - auto x1 = x1_t.data(); - auto y1 = y1_t.data(); - auto x2 = x2_t.data(); - auto y2 = y2_t.data(); - auto areas = areas_t.data(); + auto suppressed = suppressed_t.data_ptr(); + auto order = order_t.data_ptr(); + auto x1 = x1_t.data_ptr(); + auto y1 = y1_t.data_ptr(); + auto x2 = x2_t.data_ptr(); + auto y2 = y2_t.data_ptr(); + auto areas = areas_t.data_ptr(); for (int64_t _i = 0; _i < ndets; _i++) { auto i = order[_i]; @@ -68,4 +68,4 @@ at::Tensor nms(const at::Tensor& dets, const float threshold) { PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("nms", &nms, "non-maximum suppression"); -} \ No newline at end of file +} diff --git a/detector/nms/src/nms_cuda.cpp b/detector/nms/src/nms_cuda.cpp index 0ea6f9b5..8bb650e3 100644 --- a/detector/nms/src/nms_cuda.cpp +++ b/detector/nms/src/nms_cuda.cpp @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); @@ -14,4 +14,4 @@ at::Tensor nms(const at::Tensor& dets, const float threshold) { PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("nms", &nms, "non-maximum suppression"); -} \ No newline at end of file +} diff --git a/detector/nms/src/nms_kernel.cu b/detector/nms/src/nms_kernel.cu index 9254f2ad..166edecd 100644 --- a/detector/nms/src/nms_kernel.cu +++ b/detector/nms/src/nms_kernel.cu @@ -69,7 +69,7 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, // boxes is a N x 5 tensor at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { using scalar_t = float; - AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); + AT_ASSERTM(boxes.is_cuda(), "boxes must be a CUDA tensor"); auto scores = boxes.select(1, 4); auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto boxes_sorted = boxes.index_select(0, order_t); @@ -78,7 +78,7 @@ at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); - scalar_t* boxes_dev = boxes_sorted.data(); + scalar_t* boxes_dev = boxes_sorted.data_ptr(); THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState @@ -106,7 +106,7 @@ at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); - int64_t* keep_out = keep.data(); + int64_t* keep_out = keep.data_ptr(); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { @@ -128,4 +128,4 @@ at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( order_t.device(), keep.scalar_type()) }).sort(0, false)); -} \ No newline at end of file +}