Merge branch 'main' into HOLD_GIL

facebookresearch · Oct 4, 2024 · afea80b · afea80b
2 parents 68f8fc4 + 5dffc1b
commit afea80b
Show file tree

Hide file tree

Showing 8 changed files with 63 additions and 76 deletions.
diff --git a/src/binding/core/conversion.cpp b/src/binding/core/conversion.cpp
@@ -17,18 +17,14 @@
 
 #include "gil.h"
 
-#include <cstring>
-
 namespace nb = nanobind;
 
 namespace spdl::core {
 namespace {
 template <MediaType media_type>
-CPUBufferPtr convert(
-    const FFmpegFramesPtr<media_type>&& frames,
-    bool pin_memory) {
+CPUBufferPtr convert(const FFmpegFramesPtr<media_type>&& frames) {
   RELEASE_GIL();
-  return convert_frames(frames.get(), pin_memory);
+  return convert_frames(frames.get());
 }
 
 template <MediaType media_type>
@@ -42,48 +38,22 @@ std::vector<const spdl::core::FFmpegFrames<media_type>*> _ref(
 }
 
 template <MediaType media_type>
-CPUBufferPtr batch_convert(
-    std::vector<FFmpegFramesPtr<media_type>>&& frames,
-    bool pin_memory) {
+CPUBufferPtr batch_convert(std::vector<FFmpegFramesPtr<media_type>>&& frames) {
   RELEASE_GIL();
-  return convert_frames(_ref(frames), pin_memory);
+  return convert_frames(_ref(frames));
 }
 } // namespace
 
 void register_conversion(nb::module_& m) {
   ////////////////////////////////////////////////////////////////////////////////
   // Frame conversion
   ////////////////////////////////////////////////////////////////////////////////
-  m.def(
-      "convert_frames",
-      &convert<MediaType::Audio>,
-      nb::arg("frames"),
-      nb::arg("pin_memory") = false);
-  m.def(
-      "convert_frames",
-      &convert<MediaType::Video>,
-      nb::arg("frames"),
-      nb::arg("pin_memory") = false);
-  m.def(
-      "convert_frames",
-      &convert<MediaType::Image>,
-      nb::arg("frames"),
-      nb::arg("pin_memory") = false);
+  m.def("convert_frames", &convert<MediaType::Audio>, nb::arg("frames"));
+  m.def("convert_frames", &convert<MediaType::Video>, nb::arg("frames"));
+  m.def("convert_frames", &convert<MediaType::Image>, nb::arg("frames"));
 
-  m.def(
-      "convert_frames",
-      &batch_convert<MediaType::Audio>,
-      nb::arg("frames"),
-      nb::arg("pin_memory") = false);
-  m.def(
-      "convert_frames",
-      &batch_convert<MediaType::Video>,
-      nb::arg("frames"),
-      nb::arg("pin_memory") = false);
-  m.def(
-      "convert_frames",
-      &batch_convert<MediaType::Image>,
-      nb::arg("frames"),
-      nb::arg("pin_memory") = false);
+  m.def("convert_frames", &batch_convert<MediaType::Audio>, nb::arg("frames"));
+  m.def("convert_frames", &batch_convert<MediaType::Video>, nb::arg("frames"));
+  m.def("convert_frames", &batch_convert<MediaType::Image>, nb::arg("frames"));
 }
 } // namespace spdl::core
diff --git a/src/libspdl/core/buffer.h b/src/libspdl/core/buffer.h
@@ -27,7 +27,7 @@ struct CUDABuffer;
 using CPUBufferPtr = std::unique_ptr<CPUBuffer>;
 using CUDABufferPtr = std::unique_ptr<CUDABuffer>;
 
-/// Abstract base buffer class (to be exposed to Python)
+/// Abstract base buffer class (technically not needed)
 /// Represents contiguous array memory.
 struct Buffer {
   ///
@@ -40,45 +40,44 @@ struct Buffer {
   /// Size of unit element
   size_t depth = sizeof(uint8_t);
 
-  ///
-  /// The actual data.
-  std::shared_ptr<Storage> storage;
-
-  Buffer(
-      std::vector<size_t> shape,
-      ElemClass elem_class,
-      size_t depth,
-      Storage* storage);
+  Buffer(std::vector<size_t> shape, ElemClass elem_class, size_t depth);
   virtual ~Buffer() = default;
 
   ///
   /// Returns the pointer to the head of the data buffer.
-  void* data();
+  virtual void* data() = 0;
 };
 
 ///
 /// Contiguous array data on CPU memory.
 struct CPUBuffer : public Buffer {
+  std::shared_ptr<CPUStorage> storage;
+
   CPUBuffer(
       const std::vector<size_t>& shape,
       ElemClass elem_class,
       size_t depth,
-      CPUStorage* storage);
+      std::shared_ptr<CPUStorage> storage);
+
+  void* data() override;
 };
 
 ///
 /// Contiguous array data on a CUDA device.
 struct CUDABuffer : Buffer {
 #ifdef SPDL_USE_CUDA
+  std::shared_ptr<CUDAStorage> storage;
   int device_index;
 
   CUDABuffer(
       std::vector<size_t> shape,
       ElemClass elem_class,
       size_t depth,
-      CUDAStorage* storage,
+      std::shared_ptr<CUDAStorage> storage,
       int device_index);
 
+  void* data() override;
+
   uintptr_t get_cuda_stream() const;
 
 #endif

diff --git a/src/libspdl/core/buffer/cpu.cpp b/src/libspdl/core/buffer/cpu.cpp
@@ -18,19 +18,8 @@ namespace spdl::core {
 ////////////////////////////////////////////////////////////////////////////////
 // Buffer
 ////////////////////////////////////////////////////////////////////////////////
-Buffer::Buffer(
-    std::vector<size_t> shape_,
-    ElemClass elem_class_,
-    size_t depth_,
-    Storage* storage_)
-    : shape(std::move(shape_)),
-      elem_class(elem_class_),
-      depth(depth_),
-      storage(storage_) {}
-
-void* Buffer::data() {
-  return storage->data();
-}
+Buffer::Buffer(std::vector<size_t> shape_, ElemClass elem_class_, size_t depth_)
+    : shape(std::move(shape_)), elem_class(elem_class_), depth(depth_) {}
 
 ////////////////////////////////////////////////////////////////////////////////
 // CPUBuffer
@@ -39,8 +28,12 @@ CPUBuffer::CPUBuffer(
     const std::vector<size_t>& shape_,
     ElemClass elem_class_,
     size_t depth_,
-    CPUStorage* storage_)
-    : Buffer(shape_, elem_class_, depth_, (Storage*)storage_) {}
+    std::shared_ptr<CPUStorage> storage_)
+    : Buffer(shape_, elem_class_, depth_), storage(std::move(storage_)) {}
+
+void* CPUBuffer::data() {
+  return storage->data();
+}
 
 ////////////////////////////////////////////////////////////////////////////////
 // Factory functions
@@ -67,7 +60,7 @@ std::unique_ptr<CPUBuffer> cpu_buffer(
       fmt::join(shape, ", "),
       depth);
   return std::make_unique<CPUBuffer>(
-      shape, elem_class, depth, new CPUStorage{size, pin_memory});
+      shape, elem_class, depth, std::make_shared<CPUStorage>(size, pin_memory));
 }
 
 } // namespace spdl::core
diff --git a/src/libspdl/core/buffer/cuda.cpp b/src/libspdl/core/buffer/cuda.cpp
@@ -17,11 +17,16 @@ CUDABuffer::CUDABuffer(
     std::vector<size_t> shape_,
     ElemClass elem_class_,
     size_t depth_,
-    CUDAStorage* storage_,
+    std::shared_ptr<CUDAStorage> storage_,
     int device_index_)
-    : Buffer(std::move(shape_), elem_class_, depth_, (Storage*)storage_),
+    : Buffer(std::move(shape_), elem_class_, depth_),
+      storage(std::move(storage_)),
       device_index(device_index_) {}
 
+void* CUDABuffer::data() {
+  return storage->data();
+}
+
 uintptr_t CUDABuffer::get_cuda_stream() const {
   return (uintptr_t)(((CUDAStorage*)(storage.get()))->stream);
 }
@@ -48,7 +53,7 @@ CUDABufferPtr cuda_buffer(
       shape,
       elem_class,
       depth,
-      new CUDAStorage{depth * prod(shape), cfg},
+      std::make_shared<CUDAStorage>(depth * prod(shape), cfg),
       cfg.device_index);
 }
 

diff --git a/src/libspdl/core/conversion.h b/src/libspdl/core/conversion.h
@@ -15,6 +15,8 @@
 
 namespace spdl::core {
 
+// The actual implementation is found in
+// detail/ffmpeg/conversion.cpp
 template <MediaType media_type>
 CPUBufferPtr convert_frames(
     const std::vector<const FFmpegFrames<media_type>*>& batch,

diff --git a/src/libspdl/core/storage/cpu.cpp b/src/libspdl/core/storage/cpu.cpp
@@ -33,6 +33,8 @@ CPUStorage::CPUStorage(size_t size, bool pin_memory) {
     LOG(WARNING)
         << "`pin_memory` requires SPDL with CUDA support. Falling back to CPU memory.";
 #else
+    LOG(WARNING)
+        << "`pin_memory` is under development and is currently known to be slower and unstable";
     CHECK_CUDA(
         cudaHostAlloc(&data_, size, cudaHostAllocDefault),
         "Failed to allocate pinned memory.");

diff --git a/src/spdl/io/_composite.py b/src/spdl/io/_composite.py
@@ -420,7 +420,6 @@ async def async_load_image_batch(
     decode_config: DecodeConfig | None = None,
     filter_desc: str | None = _FILTER_DESC_DEFAULT,
     device_config: None = None,
-    pin_memory: bool = False,
     strict: bool = True,
     **kwargs,
 ) -> CPUBuffer: ...
@@ -437,7 +436,6 @@ async def async_load_image_batch(
     decode_config: DecodeConfig | None = None,
     filter_desc: str | None = _FILTER_DESC_DEFAULT,
     device_config: CUDAConfig,
-    pin_memory: bool = False,
     strict: bool = True,
     **kwargs,
 ) -> CUDABuffer: ...
@@ -453,7 +451,6 @@ async def async_load_image_batch(
     decode_config=None,
     filter_desc=_FILTER_DESC_DEFAULT,
     device_config=None,
-    pin_memory=False,
     strict=True,
     **kwargs,
 ):
@@ -544,6 +541,13 @@ async def async_load_image_batch(
     if not srcs:
         raise ValueError("`srcs` must not be empty.")
 
+    if "pin_memory" in kwargs:
+        warnings.warn(
+            "`pin_memory` argument has been removed. Use `storage` instead.",
+            stacklevel=2,
+        )
+        kwargs.pop("pin_memory")
+
     if device_config is None and "cuda_config" in kwargs:
         warnings.warn(
             "The `cuda_config` argument has ben renamed to `device_config`.",
@@ -582,7 +586,7 @@ async def async_load_image_batch(
     if not frames:
         raise RuntimeError("Failed to load all the images.")
 
-    buffer = await _core.async_convert_frames(frames, pin_memory=pin_memory)
+    buffer = await _core.async_convert_frames(frames)
 
     if device_config is not None:
         buffer = await _core.async_transfer_buffer(buffer, device_config=device_config)

diff --git a/src/spdl/io/_core.py b/src/spdl/io/_core.py
@@ -552,6 +552,12 @@ def convert_frames(
             - ``W``: width
             - ``H``: height
     """
+    if "pin_memory" in kwargs:
+        warnings.warn(
+            "`pin_memory` argument has been removed. Use `storage` instead.",
+            stacklevel=2,
+        )
+        kwargs.pop("pin_memory")
     return _libspdl.convert_frames(frames, **kwargs)
 
 
@@ -567,6 +573,12 @@ async def async_convert_frames(
     **kwargs,
 ) -> CPUBuffer:
     """Async version of :py:func:`~spdl.io.convert_frames`."""
+    if "pin_memory" in kwargs:
+        warnings.warn(
+            "`pin_memory` argument has been removed. Use `storage` instead.",
+            stacklevel=2,
+        )
+        kwargs.pop("pin_memory")
     return await run_async(convert_frames, frames, **kwargs)