diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 7c7fef53d9a..222e276bd11 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -12,6 +12,7 @@ concurrency:
 jobs:
   pr-builder:
     needs:
+      - changed-files
       - checks
       - conda-cpp-build
       - conda-cpp-checks
@@ -38,6 +39,63 @@ jobs:
       - integration-tests
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
+    if: always()
+    with:
+      needs: ${{ toJSON(needs) }}
+  changed-files:
+    runs-on: ubuntu-latest
+    name: "Check changed files"
+    outputs:
+      test_cpp: ${{ steps.changed-files.outputs.cpp_any_changed == 'true' }}
+      test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }}
+      test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }}
+      test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }}
+    steps:
+      - name: Get PR info
+        id: get-pr-info
+        uses: rapidsai/shared-actions/get-pr-info@main
+      - name: Checkout code repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.sha }}
+          fetch-depth: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).commits }}
+          persist-credentials: false
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+          files_yaml: |
+            cpp:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!docs/**'
+              - '!img/**'
+              - '!java/**'
+              - '!notebooks/**'
+              - '!python/**'
+            java:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!docs/**'
+              - '!img/**'
+              - '!notebooks/**'
+              - '!python/**'
+            notebooks:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!java/**'
+            python:
+              - '**'
+              - '!CONTRIBUTING.md'
+              - '!README.md'
+              - '!docs/**'
+              - '!img/**'
+              - '!java/**'
+              - '!notebooks/**'
   checks:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10
@@ -57,9 +115,10 @@ jobs:
       build_type: pull-request
       enable_check_symbols: true
   conda-cpp-tests:
-    needs: conda-cpp-build
+    needs: [conda-cpp-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_cpp == 'true'
     with:
       build_type: pull-request
   conda-python-build:
@@ -69,24 +128,27 @@ jobs:
     with:
       build_type: pull-request
   conda-python-cudf-tests:
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       build_type: pull-request
       script: "ci/test_python_cudf.sh"
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       build_type: pull-request
       script: "ci/test_python_other.sh"
   conda-java-tests:
-    needs: conda-cpp-build
+    needs: [conda-cpp-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_java == 'true'
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -104,9 +166,10 @@ jobs:
       container_image: "rapidsai/ci-wheel:latest"
       run_script: "ci/configure_cpp_static.sh"
   conda-notebook-tests:
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_notebooks == 'true'
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -146,9 +209,10 @@ jobs:
       build_type: pull-request
       script: "ci/build_wheel_cudf.sh"
   wheel-tests-cudf:
-    needs: wheel-build-cudf
+    needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       build_type: pull-request
       script: ci/test_wheel_cudf.sh
@@ -162,9 +226,10 @@ jobs:
       build_type: pull-request
       script: "ci/build_wheel_cudf_polars.sh"
   wheel-tests-cudf-polars:
-    needs: wheel-build-cudf-polars
+    needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -182,9 +247,10 @@ jobs:
       build_type: pull-request
       script: "ci/build_wheel_dask_cudf.sh"
   wheel-tests-dask-cudf:
-    needs: wheel-build-dask-cudf
+    needs: [wheel-build-dask-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -201,9 +267,10 @@ jobs:
         build-all -DBUILD_BENCHMARKS=ON --verbose;
         sccache -s;
   unit-tests-cudf-pandas:
-    needs: wheel-build-cudf
+    needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -211,9 +278,10 @@ jobs:
       script: ci/cudf_pandas_scripts/run_tests.sh
   pandas-tests:
     # run the Pandas unit tests using PR branch
-    needs: wheel-build-cudf
+    needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
+    if: needs.changed-files.outputs.test_python == 'true'
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1b17eae0842..f861fb57916 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -144,7 +144,7 @@ repos:
       - id: ruff-format
         files: python/.*$
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v0.3.1
+    rev: v0.4.0
     hooks:
       - id: verify-copyright
         exclude: |
diff --git a/README.md b/README.md
index fd8b0365807..f1b010394d6 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ conda install -c rapidsai -c conda-forge -c nvidia \
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
 of our latest development branch.
 
-Note: cuDF is supported only on Linux, and with Python versions 3.9 and later.
+Note: cuDF is supported only on Linux, and with Python versions 3.10 and later.
 
 See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info.
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 5cf7508ba51..fcd6e27a7f6 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - pytest-xdist
 - pytest<8
 - python-confluent-kafka>=1.9.0,<1.10.0a0
-- python>=3.9,<3.12
+- python>=3.10,<3.12
 - pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 28b927254f7..bedc3a90885 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -74,7 +74,7 @@ dependencies:
 - pytest-xdist
 - pytest<8
 - python-confluent-kafka>=1.9.0,<1.10.0a0
-- python>=3.9,<3.12
+- python>=3.10,<3.12
 - pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a6f72ed6b75..4080c5d02da 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -666,6 +666,7 @@ add_library(
   src/unary/math_ops.cu
   src/unary/nan_ops.cu
   src/unary/null_ops.cu
+  src/utilities/cuda.cpp
   src/utilities/cuda_memcpy.cu
   src/utilities/default_stream.cpp
   src/utilities/host_memory.cpp
diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
index f7984b29d6b..75bbe8174d3 100644
--- a/cpp/benchmarks/join/generate_input_tables.cuh
+++ b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -150,13 +151,8 @@ void generate_input_tables(key_type* const build_tbl,
   CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
     &num_blocks_init_probe_tbl, init_probe_tbl<key_type, size_type>, block_size, 0));
 
-  int dev_id{-1};
-  CUDF_CUDA_TRY(cudaGetDevice(&dev_id));
-
-  int num_sms{-1};
-  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
-
-  int const num_states =
+  auto const num_sms = cudf::detail::num_multiprocessors();
+  auto const num_states =
     num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
   rmm::device_uvector<curandState> devStates(num_states, cudf::get_default_stream());
 
diff --git a/cpp/benchmarks/reduction/minmax.cpp b/cpp/benchmarks/reduction/minmax.cpp
index c89e22d3f44..636de303cc4 100644
--- a/cpp/benchmarks/reduction/minmax.cpp
+++ b/cpp/benchmarks/reduction/minmax.cpp
@@ -47,6 +47,8 @@ static void reduction_minmax(nvbench::state& state, nvbench::type_list<DataType>
   set_throughputs(state);
 }
 
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
+
 using Types = nvbench::type_list<bool, int8_t, int32_t, float, cudf::timestamp_ms>;
 
 NVBENCH_BENCH_TYPES(reduction_minmax, NVBENCH_TYPE_AXES(Types))
diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp
index 14bf90c4943..a30c27c519c 100644
--- a/cpp/benchmarks/reduction/reduce.cpp
+++ b/cpp/benchmarks/reduction/reduce.cpp
@@ -81,6 +81,8 @@ static void reduction(nvbench::state& state, nvbench::type_list<DataType, nvbenc
   set_throughputs(state);
 }
 
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
+
 using Types    = nvbench::type_list<int32_t, int64_t, double, cudf::timestamp_ms>;
 using AggKinds = nvbench::enum_type_list<cudf::reduce_aggregation::MIN,
                                          cudf::reduce_aggregation::SUM,
diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp
index fbdb40b3537..7015fe08089 100644
--- a/cpp/benchmarks/sort/rank_lists.cpp
+++ b/cpp/benchmarks/sort/rank_lists.cpp
@@ -37,6 +37,8 @@ void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_
                cudf::order::ASCENDING,
                null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
                cudf::null_order::AFTER,
+               false,
+               cudf::get_default_stream(),
                rmm::mr::get_current_device_resource());
   });
 }
diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp
index 4b0da29df9d..8b4b09464d8 100644
--- a/cpp/benchmarks/sort/rank_structs.cpp
+++ b/cpp/benchmarks/sort/rank_structs.cpp
@@ -35,6 +35,8 @@ void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enu
                cudf::order::ASCENDING,
                nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
                cudf::null_order::AFTER,
+               false,
+               cudf::get_default_stream(),
                rmm::mr::get_current_device_resource());
   });
 }
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 0afdc526981..e3e6a07661a 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -45,7 +45,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
     APPEND
     initial_code_block
     [=[
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
+find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
 execute_process(
     COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
     OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
index b6310e6cd2f..4071fa01fb2 100644
--- a/cpp/include/cudf/detail/copy_if.cuh
+++ b/cpp/include/cudf/detail/copy_if.cuh
@@ -22,6 +22,7 @@
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/table/table.hpp>
diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh
index ec7b1c3e6b6..f0510c86c3a 100644
--- a/cpp/include/cudf/detail/indexalator.cuh
+++ b/cpp/include/cudf/detail/indexalator.cuh
@@ -93,7 +93,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
    */
   __device__ inline cudf::size_type operator[](size_type idx) const
   {
-    void const* tp = p_ + (idx * this->width_);
+    void const* tp = p_ + (static_cast<std::ptrdiff_t>(idx) * this->width_);
     return type_dispatcher(this->dtype_, normalize_type{}, tp);
   }
 
@@ -109,7 +109,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
   CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
     : base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
   {
-    p_ += offset * this->width_;
+    p_ += static_cast<std::ptrdiff_t>(offset) * this->width_;
   }
 
  protected:
@@ -165,7 +165,7 @@ struct output_indexalator : base_normalator<output_indexalator, cudf::size_type>
   __device__ inline output_indexalator const operator[](size_type idx) const
   {
     output_indexalator tmp{*this};
-    tmp.p_ += (idx * this->width_);
+    tmp.p_ += static_cast<std::ptrdiff_t>(idx) * this->width_;
     return tmp;
   }
 
diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh
index 5007af7f9f1..d31ca3d92d1 100644
--- a/cpp/include/cudf/detail/utilities/cuda.cuh
+++ b/cpp/include/cudf/detail/utilities/cuda.cuh
@@ -189,35 +189,6 @@ __device__ T single_lane_block_sum_reduce(T lane_value)
   return result;
 }
 
-/**
- * @brief Get the number of elements that can be processed per thread.
- *
- * @param[in] kernel The kernel for which the elements per thread needs to be assessed
- * @param[in] total_size Number of elements
- * @param[in] block_size Expected block size
- *
- * @return cudf::size_type Elements per thread that can be processed for given specification.
- */
-template <typename Kernel>
-cudf::size_type elements_per_thread(Kernel kernel,
-                                    cudf::size_type total_size,
-                                    cudf::size_type block_size,
-                                    cudf::size_type max_per_thread = 32)
-{
-  CUDF_FUNC_RANGE();
-
-  // calculate theoretical occupancy
-  int max_blocks = 0;
-  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0));
-
-  int device = 0;
-  CUDF_CUDA_TRY(cudaGetDevice(&device));
-  int num_sms = 0;
-  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device));
-  int per_thread = total_size / (max_blocks * num_sms * block_size);
-  return std::clamp(per_thread, 1, max_per_thread);
-}
-
 /**
  * @brief Finds the smallest value not less than `number_to_round` and modulo `modulus` is
  * zero. Expects modulus to be a power of 2.
diff --git a/cpp/include/cudf/detail/utilities/cuda.hpp b/cpp/include/cudf/detail/utilities/cuda.hpp
new file mode 100644
index 00000000000..58c7ae8ed6a
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/cuda.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <algorithm>
+
+namespace CUDF_EXPORT cudf {
+namespace detail {
+
+/**
+ * @brief Get the number of multiprocessors on the device
+ */
+cudf::size_type num_multiprocessors();
+
+/**
+ * @brief Get the number of elements that can be processed per thread.
+ *
+ * @param[in] kernel The kernel for which the elements per thread needs to be assessed
+ * @param[in] total_size Number of elements
+ * @param[in] block_size Expected block size
+ *
+ * @return cudf::size_type Elements per thread that can be processed for given specification.
+ */
+template <typename Kernel>
+cudf::size_type elements_per_thread(Kernel kernel,
+                                    cudf::size_type total_size,
+                                    cudf::size_type block_size,
+                                    cudf::size_type max_per_thread = 32)
+{
+  CUDF_FUNC_RANGE();
+
+  // calculate theoretical occupancy
+  int max_blocks = 0;
+  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0));
+
+  int per_thread = total_size / (max_blocks * num_multiprocessors() * block_size);
+  return std::clamp(per_thread, 1, max_per_thread);
+}
+
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu
index 861820f47e7..72649dbe427 100644
--- a/cpp/src/io/comp/debrotli.cu
+++ b/cpp/src/io/comp/debrotli.cu
@@ -58,6 +58,7 @@ THE SOFTWARE.
 #include "gpuinflate.hpp"
 #include "io/utilities/block_utils.cuh"
 
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -2047,19 +2048,14 @@ CUDF_KERNEL void __launch_bounds__(block_size, 2)
  */
 size_t __host__ get_gpu_debrotli_scratch_size(int max_num_inputs)
 {
-  int sm_count = 0;
-  int dev      = 0;
   uint32_t max_fb_size, min_fb_size, fb_size;
-  CUDF_CUDA_TRY(cudaGetDevice(&dev));
-  if (cudaSuccess == cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev)) {
-    // printf("%d SMs on device %d\n", sm_count, dev);
-    max_num_inputs =
-      min(max_num_inputs, sm_count * 3);  // no more than 3 blocks/sm at most due to 32KB smem use
-    if (max_num_inputs <= 0) {
-      max_num_inputs = sm_count * 2;  // Target 2 blocks/SM by default for scratch mem computation
-    }
+  auto const sm_count = cudf::detail::num_multiprocessors();
+  // no more than 3 blocks/sm at most due to 32KB smem use
+  max_num_inputs = std::min(max_num_inputs, sm_count * 3);
+  if (max_num_inputs <= 0) {
+    max_num_inputs = sm_count * 2;  // Target 2 blocks/SM by default for scratch mem computation
   }
-  max_num_inputs = min(max(max_num_inputs, 1), 512);
+  max_num_inputs = std::min(std::max(max_num_inputs, 1), 512);
   // Max fb size per block occurs if all huffman tables for all 3 group types fail local_alloc()
   // with num_htrees=256 (See HuffmanTreeGroupAlloc)
   max_fb_size = 256 * (630 + 1080 + 920) * 2;  // 1.3MB
diff --git a/cpp/src/utilities/cuda.cpp b/cpp/src/utilities/cuda.cpp
new file mode 100644
index 00000000000..53ca0608170
--- /dev/null
+++ b/cpp/src/utilities/cuda.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/utilities/cuda.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <cuda_runtime.h>
+
+namespace cudf::detail {
+
+cudf::size_type num_multiprocessors()
+{
+  int device = 0;
+  CUDF_CUDA_TRY(cudaGetDevice(&device));
+  int num_sms = 0;
+  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device));
+  return num_sms;
+}
+
+}  // namespace cudf::detail
diff --git a/dependencies.yaml b/dependencies.yaml
index 194577817db..04b5940c9fb 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -584,10 +584,6 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
-          - matrix:
-              py: "3.9"
-            packages:
-              - python=3.9
           - matrix:
               py: "3.10"
             packages:
@@ -598,7 +594,7 @@ dependencies:
               - python=3.11
           - matrix:
             packages:
-              - python>=3.9,<3.12
+              - python>=3.10,<3.12
   run_common:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index e7bac17f8ba..a6d26d17d46 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
@@ -42,7 +42,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 2d0222a3fe9..01e7299a33a 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cudf==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 7c28e7b9a6c..a5c99e2bc11 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -105,7 +105,9 @@ def from_polars(cls, df: pl.DataFrame) -> Self:
         return cls(
             [
                 NamedColumn(column, h_col.name).copy_metadata(h_col)
-                for column, h_col in zip(d_table.columns(), df.iter_columns())
+                for column, h_col in zip(
+                    d_table.columns(), df.iter_columns(), strict=True
+                )
             ]
         )
 
@@ -134,8 +136,10 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            # TODO: strict=True when we drop py39
-            [NamedColumn(c, name) for c, name in zip(table.columns(), names)]
+            [
+                NamedColumn(c, name)
+                for c, name in zip(table.columns(), names, strict=True)
+            ]
         )
 
     def sorted_like(
@@ -165,8 +169,7 @@ def sorted_like(
         subset = self.column_names_set if subset is None else subset
         self.columns = [
             c.sorted_like(other) if c.name in subset else c
-            # TODO: strict=True when we drop py39
-            for c, other in zip(self.columns, like.columns)
+            for c, other in zip(self.columns, like.columns, strict=True)
         ]
         return self
 
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 019f00f4fca..ebc7dee6bfb 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -310,7 +310,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 *(
                     (piece.tbl, piece.column_names(include_children=False))
                     for piece in pieces
-                )
+                ),
+                strict=True,
             )
             df = DataFrame.from_table(
                 plc.concatenate.concatenate(list(tables)),
@@ -426,7 +427,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             pdf = pdf.select(self.projection)
         df = DataFrame.from_polars(pdf)
         assert all(
-            c.obj.type() == dtype for c, dtype in zip(df.columns, self.schema.values())
+            c.obj.type() == dtype
+            for c, dtype in zip(df.columns, self.schema.values(), strict=True)
         )
         if self.predicate is not None:
             (mask,) = broadcast(self.predicate.evaluate(df), target_length=df.num_rows)
@@ -600,9 +602,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         for i, table in enumerate(raw_tables):
             (column,) = table.columns()
             raw_columns.append(NamedColumn(column, f"tmp{i}"))
-        mapping = dict(zip(replacements, raw_columns))
+        mapping = dict(zip(replacements, raw_columns, strict=True))
         result_keys = [
-            NamedColumn(gk, k.name) for gk, k in zip(group_keys.columns(), keys)
+            NamedColumn(gk, k.name)
+            for gk, k in zip(group_keys.columns(), keys, strict=True)
         ]
         result_subs = DataFrame(raw_columns)
         results = [
@@ -752,7 +755,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             columns = plc.join.cross_join(left.table, right.table).columns()
             left_cols = [
                 NamedColumn(new, old.name).sorted_like(old)
-                for new, old in zip(columns[: left.num_columns], left.columns)
+                for new, old in zip(
+                    columns[: left.num_columns], left.columns, strict=True
+                )
             ]
             right_cols = [
                 NamedColumn(
@@ -761,7 +766,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                     if old.name not in left.column_names_set
                     else f"{old.name}{suffix}",
                 )
-                for new, old in zip(columns[left.num_columns :], right.columns)
+                for new, old in zip(
+                    columns[left.num_columns :], right.columns, strict=True
+                )
             ]
             return DataFrame([*left_cols, *right_cols])
         # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
@@ -803,6 +810,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                         for left_col, right_col in zip(
                             left.select_columns(left_on.column_names_set),
                             right.select_columns(right_on.column_names_set),
+                            strict=True,
                         )
                     )
                 )
@@ -909,7 +917,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         result = DataFrame(
             [
                 NamedColumn(c, old.name).sorted_like(old)
-                for c, old in zip(table.columns(), df.columns)
+                for c, old in zip(table.columns(), df.columns, strict=True)
             ]
         )
         if keys_sorted or self.stable:
@@ -974,7 +982,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             self.null_order,
         )
         columns = [
-            NamedColumn(c, old.name) for c, old in zip(table.columns(), df.columns)
+            NamedColumn(c, old.name)
+            for c, old in zip(table.columns(), df.columns, strict=True)
         ]
         # If a sort key is in the result table, set the sortedness property
         for k, i in enumerate(keys_in_result):
@@ -1089,7 +1098,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # final tag is "swapping" which is useful for the
             # optimiser (it blocks some pushdown operations)
             old, new, _ = self.options
-            return df.rename_columns(dict(zip(old, new)))
+            return df.rename_columns(dict(zip(old, new, strict=True)))
         elif self.name == "explode":
             df = self.df.evaluate(cache=cache)
             ((to_explode,),) = self.options
diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py
index 02440e67fde..5276073e62a 100644
--- a/python/cudf_polars/cudf_polars/typing/__init__.py
+++ b/python/cudf_polars/cudf_polars/typing/__init__.py
@@ -13,9 +13,7 @@
 from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
 
 if TYPE_CHECKING:
-    from typing import Callable
-
-    from typing_extensions import TypeAlias
+    from typing import Callable, TypeAlias
 
     import polars as pl
 
diff --git a/python/cudf_polars/cudf_polars/utils/sorting.py b/python/cudf_polars/cudf_polars/utils/sorting.py
index 17ea44e5b1b..6ce216cbf8f 100644
--- a/python/cudf_polars/cudf_polars/utils/sorting.py
+++ b/python/cudf_polars/cudf_polars/utils/sorting.py
@@ -45,7 +45,7 @@ def sort_order(
     null_precedence = []
     if len(descending) != len(nulls_last) or len(descending) != num_keys:
         raise ValueError("Mismatching length of arguments in sort_order")
-    for asc, null_last in zip(column_order, nulls_last):
+    for asc, null_last in zip(column_order, nulls_last, strict=True):
         if (asc == plc.types.Order.ASCENDING) ^ (not null_last):
             null_precedence.append(plc.types.NullOrder.AFTER)
         elif (asc == plc.types.Order.ASCENDING) ^ null_last:
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index c380853035d..0382e3ce6a2 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "polars>=1.0,<1.3",
     "pylibcudf==24.10.*,>=0.0.0a0",
@@ -28,7 +28,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
@@ -62,7 +61,7 @@ exclude_also = [
 [tool.ruff]
 line-length = 88
 indent-width = 4
-target-version = "py39"
+target-version = "py310"
 fix = true
 
 [tool.ruff.lint]
@@ -115,6 +114,9 @@ ignore = [
   "TD003", # Missing issue link on the line following this TODO
   # tryceratops
   "TRY003", # Avoid specifying long messages outside the exception class
+  # pyupgrade
+  "UP035",  # Import from `collections.abc` instead: `Callable`
+  "UP038",  # Use `X | Y` in `isinstance` call instead of `(X, Y)`
   # Lints below are turned off because of conflicts with the ruff
   # formatter
   # See https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
@@ -137,6 +139,10 @@ fixable = ["ALL"]
 
 [tool.ruff.lint.per-file-ignores]
 "**/tests/**/*.py" = ["D"]
+"**/cudf_polars/typing/__init__.py" = [
+  # pyupgrade
+  "UP007", # Use `X | Y` for type annotations
+]
 
 [tool.ruff.lint.flake8-pytest-style]
 # https://docs.astral.sh/ruff/settings/#lintflake8-pytest-style
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index d6b88167262..be5331236a5 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "confluent-kafka>=1.9.0,<1.10.0a0",
     "cudf==24.10.*,>=0.0.0a0",
@@ -31,7 +31,6 @@ classifiers = [
     "Topic :: Apache Kafka",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index d5da7030a75..93bf532d67f 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cudf==24.10.*,>=0.0.0a0",
     "cupy-cuda11x>=12.0.0",
@@ -32,7 +32,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 5f5594b462b..0d673ea4cc3 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
     "libcudf==24.10.*,>=0.0.0a0",
@@ -32,7 +32,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]