diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 6db282a7728..7f3edfa0a01 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -325,7 +325,6 @@ ConfigureBench( string/filter.cpp string/repeat_strings.cpp string/replace.cpp - string/slice.cpp string/translate.cpp string/url_decode.cu ) @@ -346,6 +345,7 @@ ConfigureNVBench( string/like.cpp string/replace_re.cpp string/reverse.cpp + string/slice.cpp string/split.cpp string/split_re.cpp ) diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp index 0f973a7c8b5..1898f0340b6 100644 --- a/cpp/benchmarks/string/slice.cpp +++ b/cpp/benchmarks/string/slice.cpp @@ -14,11 +14,8 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include -#include -#include +#include #include @@ -29,56 +26,56 @@ #include +#include + #include -class StringSlice : public cudf::benchmark {}; +static void bench_slice(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const stype = state.get_string("type"); -enum slice_type { position, multi_position }; + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } -static void BM_slice(benchmark::State& state, slice_type rt) -{ - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); - auto starts_itr = thrust::constant_iterator(max_str_length / 3); - auto stops_itr = thrust::constant_iterator(max_str_length / 2); - cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); - cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); + auto starts_itr = thrust::constant_iterator(row_width / 4); + auto starts = + cudf::test::fixed_width_column_wrapper(starts_itr, starts_itr + num_rows); + auto stops_itr = thrust::constant_iterator(row_width / 3); + auto stops = + cudf::test::fixed_width_column_wrapper(stops_itr, stops_itr + num_rows); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - switch (rt) { - case position: - cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2); - break; - case multi_position: cudf::strings::slice_strings(input, starts, stops); break; - } + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(stream); + state.add_element_count(chars_size, "chars_size"); // number of bytes + state.add_global_memory_reads(chars_size); // all bytes are read + auto output_size = (row_width / 3 - row_width / 4) * num_rows; + state.add_global_memory_writes(output_size); + + if (stype == "multi") { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::slice_strings(input, starts, stops, stream); + }); + } else { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::slice_strings(input, row_width / 4, row_width / 3, 1, stream); + }); } - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); + set_throughputs(state); } -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 2; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); -} - -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringSlice, name) \ - (::benchmark::State & st) { BM_slice(st, slice_type::name); } \ - BENCHMARK_REGISTER_F(StringSlice, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(position) -STRINGS_BENCHMARK_DEFINE(multi_position) +NVBENCH_BENCH(bench_slice) + .set_name("slice") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {262144, 2097152, 16777216}) + .add_string_axis("type", {"position", "multi"});