Skip to content

Commit

Permalink
Merge branch 'main' into HOLD_GIL
Browse files Browse the repository at this point in the history
  • Loading branch information
mthrok authored Oct 4, 2024
2 parents 68f8fc4 + 5dffc1b commit afea80b
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 76 deletions.
50 changes: 10 additions & 40 deletions src/binding/core/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,14 @@

#include "gil.h"

#include <cstring>

namespace nb = nanobind;

namespace spdl::core {
namespace {
template <MediaType media_type>
CPUBufferPtr convert(
const FFmpegFramesPtr<media_type>&& frames,
bool pin_memory) {
CPUBufferPtr convert(const FFmpegFramesPtr<media_type>&& frames) {
RELEASE_GIL();
return convert_frames(frames.get(), pin_memory);
return convert_frames(frames.get());
}

template <MediaType media_type>
Expand All @@ -42,48 +38,22 @@ std::vector<const spdl::core::FFmpegFrames<media_type>*> _ref(
}

template <MediaType media_type>
CPUBufferPtr batch_convert(
std::vector<FFmpegFramesPtr<media_type>>&& frames,
bool pin_memory) {
CPUBufferPtr batch_convert(std::vector<FFmpegFramesPtr<media_type>>&& frames) {
RELEASE_GIL();
return convert_frames(_ref(frames), pin_memory);
return convert_frames(_ref(frames));
}
} // namespace

void register_conversion(nb::module_& m) {
////////////////////////////////////////////////////////////////////////////////
// Frame conversion
////////////////////////////////////////////////////////////////////////////////
m.def(
"convert_frames",
&convert<MediaType::Audio>,
nb::arg("frames"),
nb::arg("pin_memory") = false);
m.def(
"convert_frames",
&convert<MediaType::Video>,
nb::arg("frames"),
nb::arg("pin_memory") = false);
m.def(
"convert_frames",
&convert<MediaType::Image>,
nb::arg("frames"),
nb::arg("pin_memory") = false);
m.def("convert_frames", &convert<MediaType::Audio>, nb::arg("frames"));
m.def("convert_frames", &convert<MediaType::Video>, nb::arg("frames"));
m.def("convert_frames", &convert<MediaType::Image>, nb::arg("frames"));

m.def(
"convert_frames",
&batch_convert<MediaType::Audio>,
nb::arg("frames"),
nb::arg("pin_memory") = false);
m.def(
"convert_frames",
&batch_convert<MediaType::Video>,
nb::arg("frames"),
nb::arg("pin_memory") = false);
m.def(
"convert_frames",
&batch_convert<MediaType::Image>,
nb::arg("frames"),
nb::arg("pin_memory") = false);
m.def("convert_frames", &batch_convert<MediaType::Audio>, nb::arg("frames"));
m.def("convert_frames", &batch_convert<MediaType::Video>, nb::arg("frames"));
m.def("convert_frames", &batch_convert<MediaType::Image>, nb::arg("frames"));
}
} // namespace spdl::core
25 changes: 12 additions & 13 deletions src/libspdl/core/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct CUDABuffer;
using CPUBufferPtr = std::unique_ptr<CPUBuffer>;
using CUDABufferPtr = std::unique_ptr<CUDABuffer>;

/// Abstract base buffer class (to be exposed to Python)
/// Abstract base buffer class (technically not needed)
/// Represents contiguous array memory.
struct Buffer {
///
Expand All @@ -40,45 +40,44 @@ struct Buffer {
/// Size of unit element
size_t depth = sizeof(uint8_t);

///
/// The actual data.
std::shared_ptr<Storage> storage;

Buffer(
std::vector<size_t> shape,
ElemClass elem_class,
size_t depth,
Storage* storage);
Buffer(std::vector<size_t> shape, ElemClass elem_class, size_t depth);
virtual ~Buffer() = default;

///
/// Returns the pointer to the head of the data buffer.
void* data();
virtual void* data() = 0;
};

///
/// Contiguous array data on CPU memory.
struct CPUBuffer : public Buffer {
std::shared_ptr<CPUStorage> storage;

CPUBuffer(
const std::vector<size_t>& shape,
ElemClass elem_class,
size_t depth,
CPUStorage* storage);
std::shared_ptr<CPUStorage> storage);

void* data() override;
};

///
/// Contiguous array data on a CUDA device.
struct CUDABuffer : Buffer {
#ifdef SPDL_USE_CUDA
std::shared_ptr<CUDAStorage> storage;
int device_index;

CUDABuffer(
std::vector<size_t> shape,
ElemClass elem_class,
size_t depth,
CUDAStorage* storage,
std::shared_ptr<CUDAStorage> storage,
int device_index);

void* data() override;

uintptr_t get_cuda_stream() const;

#endif
Expand Down
25 changes: 9 additions & 16 deletions src/libspdl/core/buffer/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,8 @@ namespace spdl::core {
////////////////////////////////////////////////////////////////////////////////
// Buffer
////////////////////////////////////////////////////////////////////////////////
Buffer::Buffer(
std::vector<size_t> shape_,
ElemClass elem_class_,
size_t depth_,
Storage* storage_)
: shape(std::move(shape_)),
elem_class(elem_class_),
depth(depth_),
storage(storage_) {}

void* Buffer::data() {
return storage->data();
}
Buffer::Buffer(std::vector<size_t> shape_, ElemClass elem_class_, size_t depth_)
: shape(std::move(shape_)), elem_class(elem_class_), depth(depth_) {}

////////////////////////////////////////////////////////////////////////////////
// CPUBuffer
Expand All @@ -39,8 +28,12 @@ CPUBuffer::CPUBuffer(
const std::vector<size_t>& shape_,
ElemClass elem_class_,
size_t depth_,
CPUStorage* storage_)
: Buffer(shape_, elem_class_, depth_, (Storage*)storage_) {}
std::shared_ptr<CPUStorage> storage_)
: Buffer(shape_, elem_class_, depth_), storage(std::move(storage_)) {}

void* CPUBuffer::data() {
return storage->data();
}

////////////////////////////////////////////////////////////////////////////////
// Factory functions
Expand All @@ -67,7 +60,7 @@ std::unique_ptr<CPUBuffer> cpu_buffer(
fmt::join(shape, ", "),
depth);
return std::make_unique<CPUBuffer>(
shape, elem_class, depth, new CPUStorage{size, pin_memory});
shape, elem_class, depth, std::make_shared<CPUStorage>(size, pin_memory));
}

} // namespace spdl::core
11 changes: 8 additions & 3 deletions src/libspdl/core/buffer/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,16 @@ CUDABuffer::CUDABuffer(
std::vector<size_t> shape_,
ElemClass elem_class_,
size_t depth_,
CUDAStorage* storage_,
std::shared_ptr<CUDAStorage> storage_,
int device_index_)
: Buffer(std::move(shape_), elem_class_, depth_, (Storage*)storage_),
: Buffer(std::move(shape_), elem_class_, depth_),
storage(std::move(storage_)),
device_index(device_index_) {}

void* CUDABuffer::data() {
return storage->data();
}

uintptr_t CUDABuffer::get_cuda_stream() const {
return (uintptr_t)(((CUDAStorage*)(storage.get()))->stream);
}
Expand All @@ -48,7 +53,7 @@ CUDABufferPtr cuda_buffer(
shape,
elem_class,
depth,
new CUDAStorage{depth * prod(shape), cfg},
std::make_shared<CUDAStorage>(depth * prod(shape), cfg),
cfg.device_index);
}

Expand Down
2 changes: 2 additions & 0 deletions src/libspdl/core/conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

namespace spdl::core {

// The actual implementation is found in
// detail/ffmpeg/conversion.cpp
template <MediaType media_type>
CPUBufferPtr convert_frames(
const std::vector<const FFmpegFrames<media_type>*>& batch,
Expand Down
2 changes: 2 additions & 0 deletions src/libspdl/core/storage/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ CPUStorage::CPUStorage(size_t size, bool pin_memory) {
LOG(WARNING)
<< "`pin_memory` requires SPDL with CUDA support. Falling back to CPU memory.";
#else
LOG(WARNING)
<< "`pin_memory` is under development and is currently known to be slower and unstable";
CHECK_CUDA(
cudaHostAlloc(&data_, size, cudaHostAllocDefault),
"Failed to allocate pinned memory.");
Expand Down
12 changes: 8 additions & 4 deletions src/spdl/io/_composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,6 @@ async def async_load_image_batch(
decode_config: DecodeConfig | None = None,
filter_desc: str | None = _FILTER_DESC_DEFAULT,
device_config: None = None,
pin_memory: bool = False,
strict: bool = True,
**kwargs,
) -> CPUBuffer: ...
Expand All @@ -437,7 +436,6 @@ async def async_load_image_batch(
decode_config: DecodeConfig | None = None,
filter_desc: str | None = _FILTER_DESC_DEFAULT,
device_config: CUDAConfig,
pin_memory: bool = False,
strict: bool = True,
**kwargs,
) -> CUDABuffer: ...
Expand All @@ -453,7 +451,6 @@ async def async_load_image_batch(
decode_config=None,
filter_desc=_FILTER_DESC_DEFAULT,
device_config=None,
pin_memory=False,
strict=True,
**kwargs,
):
Expand Down Expand Up @@ -544,6 +541,13 @@ async def async_load_image_batch(
if not srcs:
raise ValueError("`srcs` must not be empty.")

if "pin_memory" in kwargs:
warnings.warn(
"`pin_memory` argument has been removed. Use `storage` instead.",
stacklevel=2,
)
kwargs.pop("pin_memory")

if device_config is None and "cuda_config" in kwargs:
warnings.warn(
"The `cuda_config` argument has ben renamed to `device_config`.",
Expand Down Expand Up @@ -582,7 +586,7 @@ async def async_load_image_batch(
if not frames:
raise RuntimeError("Failed to load all the images.")

buffer = await _core.async_convert_frames(frames, pin_memory=pin_memory)
buffer = await _core.async_convert_frames(frames)

if device_config is not None:
buffer = await _core.async_transfer_buffer(buffer, device_config=device_config)
Expand Down
12 changes: 12 additions & 0 deletions src/spdl/io/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,12 @@ def convert_frames(
- ``W``: width
- ``H``: height
"""
if "pin_memory" in kwargs:
warnings.warn(
"`pin_memory` argument has been removed. Use `storage` instead.",
stacklevel=2,
)
kwargs.pop("pin_memory")
return _libspdl.convert_frames(frames, **kwargs)


Expand All @@ -567,6 +573,12 @@ async def async_convert_frames(
**kwargs,
) -> CPUBuffer:
"""Async version of :py:func:`~spdl.io.convert_frames`."""
if "pin_memory" in kwargs:
warnings.warn(
"`pin_memory` argument has been removed. Use `storage` instead.",
stacklevel=2,
)
kwargs.pop("pin_memory")
return await run_async(convert_frames, frames, **kwargs)


Expand Down

0 comments on commit afea80b

Please sign in to comment.