From c391dd633e5c0266a31ac119e4fc8423e437e38b Mon Sep 17 00:00:00 2001 From: Nikola Samardzic Date: Thu, 15 Jun 2023 20:32:58 +0000 Subject: [PATCH] [`bench.cpp`] Add benchmarks for `mul_avx`, `add_avx`, and `mul33c_avx` --- benchs/bench.cpp | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/benchs/bench.cpp b/benchs/bench.cpp index c84f32b..2cc1ab3 100644 --- a/benchs/bench.cpp +++ b/benchs/bench.cpp @@ -1,6 +1,7 @@ #include #include +#include "../src/goldilocks_cubic_extension.hpp" #include "../src/goldilocks_base_field.hpp" #include "../src/poseidon_goldilocks.hpp" #include "../src/poseidon_goldilocks_avx.hpp" @@ -23,6 +24,46 @@ #define NPHASES_LDE 2 #define NBLOCKS 1 +static void escape(void *p) { + asm volatile("" : : "g"(p): "memory"); +} + +static void BENCH_ADD_AVX(benchmark::State &state) { + __m256i a_; + __m256i b_; + + // Benchmark + for (auto _ : state) + { + Goldilocks::add_avx(a_, a_, b_); + escape(&a_); + } +} + +static void BENCH_MUL_AVX(benchmark::State &state) { + Goldilocks::Element a_ = Goldilocks::fromU64(9); + Goldilocks::Element b_ = Goldilocks::fromU64(10); + + // Benchmark + for (auto _ : state) + { + Goldilocks::mul_avx(&a_, &a_, &b_); + escape(&a_); + } +} + +static void BENCH_MUL33C_AVX(benchmark::State &state) { + Goldilocks::Element a_[12]; + Goldilocks::Element b_[12]; + + // Benchmark + for (auto _ : state) + { + Goldilocks3::mul33c_avx(a_, a_, b_); + escape(a_); + } +} + static void POSEIDON_BENCH_FULL(benchmark::State &state) { uint64_t input_size = (uint64_t)NUM_HASHES * (uint64_t)SPONGE_WIDTH; @@ -886,6 +927,18 @@ static void EXTENDEDPOL_BENCH(benchmark::State &state) free(c); } +BENCHMARK(BENCH_ADD_AVX) + ->Unit(benchmark::kNanosecond) + ->UseRealTime(); + +BENCHMARK(BENCH_MUL_AVX) + ->Unit(benchmark::kNanosecond) + ->UseRealTime(); + +BENCHMARK(BENCH_MUL33C_AVX) + ->Unit(benchmark::kNanosecond) + ->UseRealTime(); + BENCHMARK(POSEIDON_BENCH_FULL) ->Unit(benchmark::kMicrosecond) ->DenseRange(omp_get_max_threads() / 2, omp_get_max_threads(), omp_get_max_threads() / 2)