Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into apply-bmask-nvbench
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Aug 27, 2024
2 parents 7684ae9 + 115ddce commit eca523e
Show file tree
Hide file tree
Showing 36 changed files with 661 additions and 616 deletions.
2 changes: 1 addition & 1 deletion ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ IFS=',' read -r -a versions <<< "$output"

for version in "${versions[@]}"; do
echo "Installing pandas version: ${version}"
python -m pip install "pandas==${version}"
python -m pip install "numpy>=1.23,<2.0a0" "pandas==${version}"
python -m pytest -p cudf.pandas \
--cov-config=./python/cudf/.coveragerc \
--cov=cudf \
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ dependencies:
- notebook
- numba>=0.57
- numpy
- numpy>=1.23,<2.0a0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==3.0.6
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ dependencies:
- notebook
- numba>=0.57
- numpy
- numpy>=1.23,<2.0a0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcomp==3.0.6
- nvtx>=0.2.1
Expand Down
6 changes: 2 additions & 4 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ requirements:
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
- dlpack >=0.8,<1.0
# TODO: Change to `2.0` for NumPy 2
- numpy 1.23
- numpy 2.0
- pyarrow ==16.1.0.*
- libcudf ={{ version }}
- pylibcudf ={{ version }}
Expand All @@ -84,8 +83,7 @@ requirements:
- pandas >=2.0,<2.2.3dev0
- cupy >=12.0.0
- numba >=0.57
# TODO: Update `numpy` in `host` when dropping `<2.0a0`
- numpy >=1.23,<2.0a0
- numpy >=1.23,<3.0a0
- {{ pin_compatible('pyarrow', max_pin='x.x') }}
- libcudf ={{ version }}
- pylibcudf ={{ version }}
Expand Down
6 changes: 2 additions & 4 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ requirements:
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
- dlpack >=0.8,<1.0
# TODO: Change to `2.0` for NumPy 2
- numpy 1.23
- numpy 2.0
- pyarrow ==16.1.0.*
- libcudf ={{ version }}
- rmm ={{ minor_version }}
Expand All @@ -81,8 +80,7 @@ requirements:
- python
- typing_extensions >=4.0.0
- pandas >=2.0,<2.2.3dev0
# TODO: Update `numpy` in `host` when dropping `<2.0a0`
- numpy >=1.23,<2.0a0
- numpy >=1.23,<3.0a0
- {{ pin_compatible('pyarrow', max_pin='x.x') }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
Expand Down
11 changes: 0 additions & 11 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1069,23 +1069,12 @@ if(CUDF_ENABLE_ARROW_PARQUET)
)
endif()

string(
APPEND
install_code_string
[=[
if(testing IN_LIST cudf_FIND_COMPONENTS)
enable_language(CUDA)
endif()
]=]
)

rapids_export(
INSTALL cudf
EXPORT_SET cudf-exports ${_components_export_string}
GLOBAL_TARGETS cudf cudftestutil
NAMESPACE cudf::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK install_code_string
)

# ##################################################################################################
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/reduction/minmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ static void reduction_minmax(nvbench::state& state, nvbench::type_list<DataType>
set_throughputs(state);
}

NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");

using Types = nvbench::type_list<bool, int8_t, int32_t, float, cudf::timestamp_ms>;

NVBENCH_BENCH_TYPES(reduction_minmax, NVBENCH_TYPE_AXES(Types))
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/reduction/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ static void reduction(nvbench::state& state, nvbench::type_list<DataType, nvbenc
set_throughputs(state);
}

NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");

using Types = nvbench::type_list<int32_t, int64_t, double, cudf::timestamp_ms>;
using AggKinds = nvbench::enum_type_list<cudf::reduce_aggregation::MIN,
cudf::reduce_aggregation::SUM,
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
*/
__device__ inline cudf::size_type operator[](size_type idx) const
{
void const* tp = p_ + (idx * this->width_);
void const* tp = p_ + (static_cast<std::ptrdiff_t>(idx) * this->width_);
return type_dispatcher(this->dtype_, normalize_type{}, tp);
}

Expand All @@ -109,7 +109,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
: base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
{
p_ += offset * this->width_;
p_ += static_cast<std::ptrdiff_t>(offset) * this->width_;
}

protected:
Expand Down Expand Up @@ -165,7 +165,7 @@ struct output_indexalator : base_normalator<output_indexalator, cudf::size_type>
__device__ inline output_indexalator const operator[](size_type idx) const
{
output_indexalator tmp{*this};
tmp.p_ += (idx * this->width_);
tmp.p_ += static_cast<std::ptrdiff_t>(idx) * this->width_;
return tmp;
}

Expand Down
191 changes: 96 additions & 95 deletions cpp/src/join/mixed_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

#include "join_common_utils.cuh"
#include "join_common_utils.hpp"
#include "mixed_join_kernels.cuh"
#include "mixed_join_kernel.hpp"
#include "mixed_join_size_kernel.hpp"

#include <cudf/ast/detail/expression_parser.hpp>
#include <cudf/ast/expressions.hpp>
Expand Down Expand Up @@ -178,9 +179,6 @@ mixed_join(
join_size = output_size_data->first;
matches_per_row_span = output_size_data->second;
} else {
// Allocate storage for the counter used to get the size of the join output
rmm::device_scalar<std::size_t> size(0, stream, mr);

matches_per_row =
rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
// Note that the view goes out of scope after this else statement, but the
Expand All @@ -190,37 +188,38 @@ mixed_join(
matches_per_row_span = cudf::device_span<size_type const>{
matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
if (has_nulls) {
compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
mutable_matches_per_row_span);
join_size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
mutable_matches_per_row_span,
config,
shmem_size_per_block,
stream,
mr);
} else {
compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
mutable_matches_per_row_span);
join_size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
mutable_matches_per_row_span,
config,
shmem_size_per_block,
stream,
mr);
}
join_size = size.value(stream);
}

// The initial early exit clauses guarantee that we will not reach this point
Expand Down Expand Up @@ -249,37 +248,39 @@ mixed_join(
auto const& join_output_r = right_indices->data();

if (has_nulls) {
mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
join_output_l,
join_output_r,
parser.device_expression_data,
join_result_offsets.data(),
swap_tables);
launch_mixed_join<true>(*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
join_output_l,
join_output_r,
parser.device_expression_data,
join_result_offsets.data(),
swap_tables,
config,
shmem_size_per_block,
stream);
} else {
mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
join_output_l,
join_output_r,
parser.device_expression_data,
join_result_offsets.data(),
swap_tables);
launch_mixed_join<false>(*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
kernel_join_type,
hash_table_view,
join_output_l,
join_output_r,
parser.device_expression_data,
join_result_offsets.data(),
swap_tables,
config,
shmem_size_per_block,
stream);
}

auto join_indices = std::pair(std::move(left_indices), std::move(right_indices));
Expand Down Expand Up @@ -423,9 +424,6 @@ compute_mixed_join_output_size(table_view const& left_equality,
detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;

// Allocate storage for the counter used to get the size of the join output
rmm::device_scalar<std::size_t> size(0, stream, mr);

auto const preprocessed_probe =
experimental::row::equality::preprocessed_table::create(probe, stream);
auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
Expand All @@ -436,39 +434,42 @@ compute_mixed_join_output_size(table_view const& left_equality,

// Determine number of output rows without actually building the output to simply
// find what the size of the output will be.
std::size_t size = 0;
if (has_nulls) {
compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
matches_per_row_span);
size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
matches_per_row_span,
config,
shmem_size_per_block,
stream,
mr);
} else {
compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
size.data(),
matches_per_row_span);
size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
*right_conditional_view,
*probe_view,
*build_view,
hash_probe,
equality_probe,
join_type,
hash_table_view,
parser.device_expression_data,
swap_tables,
matches_per_row_span,
config,
shmem_size_per_block,
stream,
mr);
}

return {size.value(stream), std::move(matches_per_row)};
return {size, std::move(matches_per_row)};
}

} // namespace detail
Expand Down
Loading

0 comments on commit eca523e

Please sign in to comment.