diff --git a/README.md b/README.md
index 2e3341b9..be8dddb9 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,9 @@
+a google colab link to test
+https://colab.research.google.com/drive/1o9RhThxyxHr4P3n6a19UGmc5LTRn5zfp?usp=sharing
+
## News!
- Dec 2019: [**v0.3.0** version](https://github.com/MVIG-SJTU/AlphaPose) of AlphaPose is released! Smaller model, higher accuracy!
diff --git a/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp b/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp
index a07426a0..ffe740db 100644
--- a/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp
+++ b/alphapose/models/layers/dcn/src/deform_conv_cuda.cpp
@@ -63,26 +63,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
int padW, int dilationH, int dilationW, int group,
int deformable_group) {
- AT_CHECK(weight.ndimension() == 4,
+ TORCH_CHECK(weight.ndimension() == 4,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s",
weight.ndimension());
- AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+ TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
- AT_CHECK(kW > 0 && kH > 0,
+ TORCH_CHECK(kW > 0 && kH > 0,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH,
kW);
- AT_CHECK((weight.size(2) == kH && weight.size(3) == kW),
+ TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
"kernel size should be consistent with weight, ",
"but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
kW, weight.size(2), weight.size(3));
- AT_CHECK(dW > 0 && dH > 0,
+ TORCH_CHECK(dW > 0 && dH > 0,
"stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
- AT_CHECK(
+ TORCH_CHECK(
dilationW > 0 && dilationH > 0,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d",
dilationH, dilationW);
@@ -98,7 +98,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
dimw++;
}
- AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
+ TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
ndim);
long nInputPlane = weight.size(1) * group;
@@ -110,7 +110,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
long outputWidth =
(inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
- AT_CHECK(nInputPlane % deformable_group == 0,
+ TORCH_CHECK(nInputPlane % deformable_group == 0,
"input channels must divide deformable group size");
if (outputWidth < 1 || outputHeight < 1)
@@ -120,27 +120,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
outputWidth);
- AT_CHECK(input.size(1) == nInputPlane,
+ TORCH_CHECK(input.size(1) == nInputPlane,
"invalid number of input planes, expected: %d, but got: %d",
nInputPlane, input.size(1));
- AT_CHECK((inputHeight >= kH && inputWidth >= kW),
+ TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
"input image is smaller than kernel");
- AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
+ TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
"invalid spatial size of offset, expected height: %d width: %d, but "
"got height: %d width: %d",
outputHeight, outputWidth, offset.size(2), offset.size(3));
- AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
+ TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
"invalid number of channels of offset");
if (gradOutput != NULL) {
- AT_CHECK(gradOutput->size(dimf) == nOutputPlane,
+ TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
"invalid number of gradOutput planes, expected: %d, but got: %d",
nOutputPlane, gradOutput->size(dimf));
- AT_CHECK((gradOutput->size(dimh) == outputHeight &&
+ TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
gradOutput->size(dimw) == outputWidth),
"invalid size of gradOutput, expected height: %d width: %d , but "
"got height: %d width: %d",
@@ -191,7 +191,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
- AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+ TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
outputHeight, outputWidth});
@@ -298,7 +298,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
- AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
+ TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
@@ -414,7 +414,7 @@ int deform_conv_backward_parameters_cuda(
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
- AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+ TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
@@ -494,8 +494,8 @@ void modulated_deform_conv_cuda_forward(
const int pad_h, const int pad_w, const int dilation_h,
const int dilation_w, const int group, const int deformable_group,
const bool with_bias) {
- AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
- AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
@@ -576,8 +576,8 @@ void modulated_deform_conv_cuda_backward(
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias) {
- AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
- AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
@@ -698,4 +698,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("modulated_deform_conv_cuda_backward",
&modulated_deform_conv_cuda_backward,
"modulated deform conv backward (CUDA)");
-}
\ No newline at end of file
+}
diff --git a/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu b/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu
index a2b94286..7a08c7e6 100644
--- a/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu
+++ b/alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu
@@ -257,9 +257,9 @@ void deformable_im2col(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
- const scalar_t *data_im_ = data_im.data();
- const scalar_t *data_offset_ = data_offset.data();
- scalar_t *data_col_ = data_col.data();
+ const scalar_t *data_im_ = data_im.data_ptr();
+ const scalar_t *data_offset_ = data_offset.data_ptr();
+ scalar_t *data_col_ = data_col.data_ptr();
deformable_im2col_gpu_kernel<<>>(
num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,
@@ -351,9 +351,9 @@ void deformable_col2im(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
- const scalar_t *data_col_ = data_col.data();
- const scalar_t *data_offset_ = data_offset.data();
- scalar_t *grad_im_ = grad_im.data();
+ const scalar_t *data_col_ = data_col.data_ptr();
+ const scalar_t *data_offset_ = data_offset.data_ptr();
+ scalar_t *grad_im_ = grad_im.data_ptr();
deformable_col2im_gpu_kernel<<>>(
num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,
@@ -449,10 +449,10 @@ void deformable_col2im_coord(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
- const scalar_t *data_col_ = data_col.data();
- const scalar_t *data_im_ = data_im.data();
- const scalar_t *data_offset_ = data_offset.data();
- scalar_t *grad_offset_ = grad_offset.data();
+ const scalar_t *data_col_ = data_col.data_ptr();
+ const scalar_t *data_im_ = data_im.data_ptr();
+ const scalar_t *data_offset_ = data_offset.data_ptr();
+ scalar_t *grad_offset_ = grad_offset.data_ptr();
deformable_col2im_coord_gpu_kernel<<>>(
num_kernels, data_col_, data_im_, data_offset_, channels, height, width,
@@ -779,10 +779,10 @@ void modulated_deformable_im2col_cuda(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
- const scalar_t *data_im_ = data_im.data();
- const scalar_t *data_offset_ = data_offset.data();
- const scalar_t *data_mask_ = data_mask.data();
- scalar_t *data_col_ = data_col.data();
+ const scalar_t *data_im_ = data_im.data_ptr();
+ const scalar_t *data_offset_ = data_offset.data_ptr();
+ const scalar_t *data_mask_ = data_mask.data_ptr();
+ scalar_t *data_col_ = data_col.data_ptr();
modulated_deformable_im2col_gpu_kernel<<>>(
num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
@@ -811,10 +811,10 @@ void modulated_deformable_col2im_cuda(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
- const scalar_t *data_col_ = data_col.data();
- const scalar_t *data_offset_ = data_offset.data();
- const scalar_t *data_mask_ = data_mask.data();
- scalar_t *grad_im_ = grad_im.data();
+ const scalar_t *data_col_ = data_col.data_ptr();
+ const scalar_t *data_offset_ = data_offset.data_ptr();
+ const scalar_t *data_mask_ = data_mask.data_ptr();
+ scalar_t *grad_im_ = grad_im.data_ptr();
modulated_deformable_col2im_gpu_kernel<<>>(
num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
@@ -844,12 +844,12 @@ void modulated_deformable_col2im_coord_cuda(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
- const scalar_t *data_col_ = data_col.data();
- const scalar_t *data_im_ = data_im.data();
- const scalar_t *data_offset_ = data_offset.data();
- const scalar_t *data_mask_ = data_mask.data();
- scalar_t *grad_offset_ = grad_offset.data();
- scalar_t *grad_mask_ = grad_mask.data();
+ const scalar_t *data_col_ = data_col.data_ptr();
+ const scalar_t *data_im_ = data_im.data_ptr();
+ const scalar_t *data_offset_ = data_offset.data_ptr();
+ const scalar_t *data_mask_ = data_mask.data_ptr();
+ scalar_t *grad_offset_ = grad_offset.data_ptr();
+ scalar_t *grad_mask_ = grad_mask.data_ptr();
modulated_deformable_col2im_coord_gpu_kernel<<>>(
num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
diff --git a/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp b/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp
index 9e0e3ffc..f6f087b8 100644
--- a/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp
+++ b/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp
@@ -33,7 +33,7 @@ void deform_psroi_pooling_cuda_forward(
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
- AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
@@ -59,8 +59,8 @@ void deform_psroi_pooling_cuda_backward(
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
- AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
- AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
diff --git a/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu b/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu
index 1922d724..b0aa5165 100644
--- a/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu
+++ b/alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu
@@ -290,11 +290,11 @@ void DeformablePSROIPoolForward(const at::Tensor data,
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data.scalar_type(), "deformable_psroi_pool_forward", ([&] {
- const scalar_t *bottom_data = data.data();
- const scalar_t *bottom_rois = bbox.data();
- const scalar_t *bottom_trans = no_trans ? NULL : trans.data();
- scalar_t *top_data = out.data();
- scalar_t *top_count_data = top_count.data();
+ const scalar_t *bottom_data = data.data_ptr();
+ const scalar_t *bottom_rois = bbox.data_ptr();
+ const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr();
+ scalar_t *top_data = out.data_ptr();
+ scalar_t *top_count_data = top_count.data_ptr();
DeformablePSROIPoolForwardKernel<<>>(
count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width,
@@ -341,13 +341,13 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] {
- const scalar_t *top_diff = out_grad.data();
- const scalar_t *bottom_data = data.data();
- const scalar_t *bottom_rois = bbox.data();
- const scalar_t *bottom_trans = no_trans ? NULL : trans.data();
- scalar_t *bottom_data_diff = in_grad.data();
- scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data();
- const scalar_t *top_count_data = top_count.data();
+ const scalar_t *top_diff = out_grad.data_ptr();
+ const scalar_t *bottom_data = data.data_ptr();
+ const scalar_t *bottom_rois = bbox.data_ptr();
+ const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr();
+ scalar_t *bottom_data_diff = in_grad.data_ptr();
+ scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data_ptr();
+ const scalar_t *top_count_data = top_count.data_ptr();
DeformablePSROIPoolBackwardAccKernel<<>>(
count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width,
@@ -361,4 +361,4 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
-}
\ No newline at end of file
+}
diff --git a/alphapose/utils/roi_align/src/roi_align_cuda.cpp b/alphapose/utils/roi_align/src/roi_align_cuda.cpp
index 4e608161..eaa5d2de 100644
--- a/alphapose/utils/roi_align/src/roi_align_cuda.cpp
+++ b/alphapose/utils/roi_align/src/roi_align_cuda.cpp
@@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
const int pooled_height, const int pooled_width,
at::Tensor bottom_grad);
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
- AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+ TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
@@ -82,4 +82,4 @@ int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
-}
\ No newline at end of file
+}
diff --git a/alphapose/utils/roi_align/src/roi_align_kernel.cu b/alphapose/utils/roi_align/src/roi_align_kernel.cu
index b914fd1e..c34cec21 100644
--- a/alphapose/utils/roi_align/src/roi_align_kernel.cu
+++ b/alphapose/utils/roi_align/src/roi_align_kernel.cu
@@ -132,9 +132,9 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
const int output_size = num_rois * pooled_height * pooled_width * channels;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
features.scalar_type(), "ROIAlignLaucherForward", ([&] {
- const scalar_t *bottom_data = features.data();
- const scalar_t *rois_data = rois.data();
- scalar_t *top_data = output.data();
+ const scalar_t *bottom_data = features.data_ptr();
+ const scalar_t *rois_data = rois.data_ptr();
+ scalar_t *top_data = output.data_ptr();
ROIAlignForward
<<>>(
@@ -275,9 +275,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] {
- const scalar_t *top_diff = top_grad.data();
- const scalar_t *rois_data = rois.data();
- scalar_t *bottom_diff = bottom_grad.data();
+ const scalar_t *top_diff = top_grad.data_ptr();
+ const scalar_t *rois_data = rois.data_ptr();
+ scalar_t *bottom_diff = bottom_grad.data_ptr();
if (sizeof(scalar_t) == sizeof(double)) {
fprintf(stderr, "double is not supported\n");
exit(-1);
@@ -291,4 +291,4 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
}));
THCudaCheck(cudaGetLastError());
return 1;
-}
\ No newline at end of file
+}
diff --git a/detector/nms/src/nms_cpu.cpp b/detector/nms/src/nms_cpu.cpp
index f7cffb49..31e984e6 100644
--- a/detector/nms/src/nms_cpu.cpp
+++ b/detector/nms/src/nms_cpu.cpp
@@ -3,7 +3,7 @@
template
at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
- AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
+ AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor");
if (dets.numel() == 0) {
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
@@ -23,13 +23,13 @@ at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
at::Tensor suppressed_t =
at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
- auto suppressed = suppressed_t.data();
- auto order = order_t.data();
- auto x1 = x1_t.data();
- auto y1 = y1_t.data();
- auto x2 = x2_t.data();
- auto y2 = y2_t.data();
- auto areas = areas_t.data();
+ auto suppressed = suppressed_t.data_ptr();
+ auto order = order_t.data_ptr();
+ auto x1 = x1_t.data_ptr();
+ auto y1 = y1_t.data_ptr();
+ auto x2 = x2_t.data_ptr();
+ auto y2 = y2_t.data_ptr();
+ auto areas = areas_t.data_ptr();
for (int64_t _i = 0; _i < ndets; _i++) {
auto i = order[_i];
@@ -68,4 +68,4 @@ at::Tensor nms(const at::Tensor& dets, const float threshold) {
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("nms", &nms, "non-maximum suppression");
-}
\ No newline at end of file
+}
diff --git a/detector/nms/src/nms_cuda.cpp b/detector/nms/src/nms_cuda.cpp
index 0ea6f9b5..8bb650e3 100644
--- a/detector/nms/src/nms_cuda.cpp
+++ b/detector/nms/src/nms_cuda.cpp
@@ -1,7 +1,7 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
@@ -14,4 +14,4 @@ at::Tensor nms(const at::Tensor& dets, const float threshold) {
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("nms", &nms, "non-maximum suppression");
-}
\ No newline at end of file
+}
diff --git a/detector/nms/src/nms_kernel.cu b/detector/nms/src/nms_kernel.cu
index 9254f2ad..166edecd 100644
--- a/detector/nms/src/nms_kernel.cu
+++ b/detector/nms/src/nms_kernel.cu
@@ -69,7 +69,7 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
// boxes is a N x 5 tensor
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
using scalar_t = float;
- AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
+ AT_ASSERTM(boxes.is_cuda(), "boxes must be a CUDA tensor");
auto scores = boxes.select(1, 4);
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
auto boxes_sorted = boxes.index_select(0, order_t);
@@ -78,7 +78,7 @@ at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
- scalar_t* boxes_dev = boxes_sorted.data();
+ scalar_t* boxes_dev = boxes_sorted.data_ptr();
THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
@@ -106,7 +106,7 @@ at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
- int64_t* keep_out = keep.data();
+ int64_t* keep_out = keep.data_ptr();
int num_to_keep = 0;
for (int i = 0; i < boxes_num; i++) {
@@ -128,4 +128,4 @@ at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
order_t.device(), keep.scalar_type())
}).sort(0, false));
-}
\ No newline at end of file
+}