Skip to content
/ jmp Public

C++20 Static Branch library

Notifications You must be signed in to change notification settings

qlibs/jmp

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 

Repository files navigation

// Overview / Examples / API / FAQ

JMP: Static branch library

MIT Licence Version Build Try it online

https://en.wikipedia.org/wiki/Branch_(computer_science)

Use cases

Performance

  • branch is relatively stable through its life cycle and/or
  • branch is expensive to compute / require memory access and/or
  • branch is hard to learn by the branch predictor

Examples: logging, tracing, dispatching, fast path, devirtualization, ...

Features

Requirements


Overview

static_branch<bool> (https://godbolt.org/z/v8W3Pzbxd)

/**
 * constexpr minimal overhead static branch changed at run-time via code patching
 */
constexpr jmp::static_branch<bool> static_bool = false;

/**
 * Note: `fn` can be inline/noinline/constexpr/etc.
 */
void fn() {
  if (static_bool) { // Note: [[likely]], [[unlikely]] has no impact
    std::puts("taken");
  } else {
    std::puts("not taken");
  }
}

int main() {
  if (not jmp::init()) { // enables run-time code patching
    return errno;
  }

  fn(); // not taken

  static_bool = true;
  fn(); // taken
}
main: // $CXX -O3
# init
  xor eax, eax # return 0

# fn(); // not taken
  nop
.Ltmp0:
  mov edi, OFFSET FLAT:.LC0
  jmp puts
  ret
.Ltmp1:
  mov edi, OFFSET FLAT:.LC1
  jmp puts
  ret

# static_bool = true;
  call static_bool.operator=(true)

# fn(); // taken
  jmp .Ltmp1 // code patching (nop->jmp .Ltmp1)
.Ltmp0:
  mov edi, OFFSET FLAT:.LC0
  jmp puts
  ret
.Ltmp1:
  mov edi, OFFSET FLAT:.LC1
  jmp puts
  ret

.LC0: .asciz "not taken"
.LC1: .asciz "taken"

static_branch<bool> vs bool (https://godbolt.org/z/jvKGdPMWK)

constexpr jmp::static_branch<bool> static_bool = false;

void fn() {
  if (static_bool) {
    throw;
  } else {
    std::puts("else");
  }
}

fn():
  // static_bool = false;
  [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
   1      0     0.25                        nop
   1      1     0.33                        lea   rdi, [rip + .L.str]
   1      1     0.50                        jmp   puts
   1      1     1.00           *            .LBB0_1: push rax
   1      1     0.50                        call  __cxa_rethrow@PLT

  // static_bool = true;
  [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
   1      1     0.50                        jmp   .LBB0_1
   1      1     0.50                        lea   rdi, [rip + .L.str]
   1      1     0.50                        jmp   puts
   3      2     1.00           *            .LBB0_1: push rax
   4      3     1.00                        call  __cxa_rethrow@PLT

[1]: #uOps   [2]: Latency  [3]: RThroughput
[4]: MayLoad [5]: MayStore [6]: HasSideEffects (U)
bool dynamic_bool = false;

void fn() {
  if (dynamic_bool) {
    throw;
  } else {
    std::puts("else");
  }
}

fn():
  // dynamic_bool = false;
  [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
   2      5     0.50    *                   cmp   byte ptr [rip + dynamic_bool], 1
   1      1     0.25                        je    .LBB0_1
   1      1     0.25                        lea   rdi, [rip + .L.str]
   1      1     0.25                        jmp   puts
   1      1     0.50           *            .LBB0_1: push rax
   1      1     0.25                        call  __cxa_rethrow@PLT

  // dynamic_bool = true;
  [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
   2      5     0.50    *                   cmp   byte ptr [rip + dynamic_bool], 1
   1      1     0.25                        je    .LBB0_1
   1      1     0.25                        lea   rdi, [rip + .L.str]
   1      1     0.25                        jmp   puts
   1      1     0.50           *            .LBB0_1: push rax
   1      1     0.25                        call  __cxa_rethrow@PLT

[1]: #uOps   [2]: Latency  [3]: RThroughput
[4]: MayLoad [5]: MayStore [6]: HasSideEffects (U)

static_branch<T, T Min, T Max> (https://godbolt.org/z/Tz4ox7ncv)

constexpr jmp::static_branch<int, 0, 2> static_int = 0; // range: <0, 2>

void fn() {
  switch (static_int) {
    default: std::unreachable();
    case 0: std::puts("0"); return;
    case 1: std::puts("1"); return;
    case 2: std::puts("2"); return;
  }
}

int main() {
  if (not jmp::init()) { // enables run-time code patching
    return errno;
  }

  fn(); // 0

  static_int = 1;
  fn(); // 1

  static_int = 2;
  fn(); // 2
}
fn: // $CXX -O3 -fno-inline
  nop # code patching (nop->jmp .Ltmp1|.Ltmp2)
.Ltmp0:
  mov edi, OFFSET FLAT:.LC0
  jmp puts
  ret
.Ltmp1:
  mov edi, OFFSET FLAT:.LC1
  jmp puts
  ret
.Ltmp2:
  mov edi, OFFSET FLAT:.LC2
  jmp puts
  ret

main:
  // ... init

  fn() // 0

  call static_int.operator=(1)
  fn() // 1

  call static_int.operator=(2)
  fn() // 2

.LC0: .asciz "0"
.LC1: .asciz "1"
.LC2: .asciz "2"

Examples

variant (https://godbolt.org/z/TKPdYPv3P) | (https://wg21.link/P2996)

template<class... Ts>
class variant {
  static constexpr jmp::static_branch<std::size_t, 0, sizeof...(Ts)> index_ = 0u;

 public:
   constexpr variant() = default;

   template<class T> requires (not std::is_base_of_v<variant, std::remove_cvref_t<T>>)
   constexpr explicit(false) variant(T&& t) {
    constexpr auto index = [] {
      std::array match{std::is_same_v<Ts, std::remove_cvref_t<T>>...};
      return std::ranges::find(match, true) - match.begin();
    }();
    index_ = index;
    std::construct_at(&storage_.[:
      nonstatic_data_members_of(^storage)[index + 1u]
    :], std::forward<T>(t));
   }
   constexpr ~variant()
    requires (std::is_trivially_destructible_v<Ts> and ...) = default;

   template<class Fn>
   constexpr auto visit(Fn&& fn) const -> decltype(auto) {
    return [&]<auto I = 0u>(this auto&& self) {
      if constexpr (I == sizeof...(Ts)) {
        std::unreachable();
      } else {
        switch (index_) {
          default: return self.template operator()<I + 1u>();
          case I:  return std::invoke(std::forward<Fn>(fn), storage_.[:
                            nonstatic_data_members_of(^storage)[I + 1u]
                          :]);
        }
      }
    }();
  }

private:
  union storage;
  struct empty{ };
  static_assert(is_type(define_class(^storage, {
    std::meta::data_member_spec(^empty, {.name = "empty"}),
    std::meta::data_member_spec(^Ts)...
  })));
  storage storage_{.empty={}};
};
void usage(const variant<bool, int, float>& v) {
  v.visit(overload{
    [](bool)  { std::puts("bool");  },
    [](int)   { std::puts("int");   },
    [](float) { std::puts("float"); },
  });
}

int main() {
  if (not jmp::init()) { // enables run-time code patching
    return errno;
  }

  variant<bool, int, float> v{};

  v = true;
  usage(v);

  v = 42;
  usage(v);

  v = 42.f;
  usage(v);
}
usage(variant<bool, int, float> const&):
  nop # code patching (nop->jmp .Ltmp1|.Ltmp2)
.Ltmp0:
  mov edi, OFFSET FLAT:.LC0
  jmp puts
  ret
.Ltmp1:
  mov edi, OFFSET FLAT:.LC1
  jmp puts
  ret
.Ltmp2:
  mov edi, OFFSET FLAT:.LC2
  jmp puts
  ret

.LC0: .asciz  "bool"
.LC1: .asciz  "int"
.LC2: .asciz  "float"

Dispatching techniques (https://godbolt.org/z/cfKP9E8W9)

auto f1() -> int { return 42; }
auto f2() -> int { return 77; }
auto f3() -> int { return 99; }
auto if_else(bool b) -> int {                  # if_else(bool):
  if (b) {                                     #   testl   %edi, %edi
    return f1();                               #   movl    $42, %ecx
  } else {                                     #   movl    $77, %eax
    return f2();                               #   cmovnel %ecx, %eax # cmove or cmp
  }                                            #   retq
}                                              #

auto if_else_likely(bool b) -> int {           # if_else_likely(bool):
  if (b) [[likely]] {                          #   movl    $42, %eax # likely
    return f1();                               #   testl   %edi, %edi
  } else {                                     #   je      .LBB3_1
    return f2();                               #   retq
  }                                            # .LBB3_1:
}                                              #   movl    $77, %eax
                                               #   retq

auto ternary_op(bool b) -> int {               # ternary_op(bool):
  return b ? f1() : f2();                      #   testl   %edi, %edi
}                                              #   movl    $42, %ecx
                                               #   movl    $77, %eax
                                               #   cmovnel %ecx, %eax # often cmove
                                               #   retq

auto jump_table(bool b) -> int {               # jump_table(bool):
  static constexpr int (*dispatch[])(){        #  movl    %edi, %eax
    &f1, &f2                                   #  leaq    dispatch(%rip), %rcx
  };                                           #  jmpq    *(%rcx,%rax,8) # or call
  return dispatch[b]();                        #
}                                              # dispatch:
                                               #  .quad   f1()
                                               #  .quad   f2()

auto jump_table_musttail(bool b) -> int {      # jump_table_musttail(bool):
  static constexpr int (*dispatch[])(bool){    #   movl    %edi, %eax
    [](bool) { return f1(); },                 #   leaq    dispatch(%rip), %rcx
    [](bool) { return f2(); },                 #   jmpq    *(%rcx,%rax,8) # always jmp
  };                                           #
  [[clang::musttail]] return dispatch[b](b);   # dispatch:
}                                              #  .quad   f1::__invoke(bool)
                                               #  .quad   f2::__invoke(bool)

auto computed_goto(bool b) -> int {            # computed_goto(bool):
  static constexpr void* labels[]{&&L1, &&L2}; #   movl    %edi, %eax
  goto *labels[b];                             #   leaq    labels(%rip), %rcx
  L1: return f1();                             #   jmpq    *(%rcx,%rax,8)
  L2: return f2();                             # .Ltmp15:
}                                              #   movl    $42, %eax
                                               #   retq
                                               # .Ltmp17:
                                               #   movl    $77, %eax
                                               #   retq
                                               #
                                               # labels:
                                               #   .quad   .Ltmp15
                                               #   .quad   .Ltmp17

jmp::static_branch<bool> branch = false;       # jmp():
auto jmp() -> int {                            #   nop|jmp .Ltmp1 # code patching
  if (branch) {                                # .Ltmp0:
    return f1();                               #   movl    $42, %eax
  } else {                                     #   retq
    return f2();                               # .Ltmp1:
  }                                            #   movl    $77, %eax
}                                              #   retq
auto if_else(int i) -> int {                   # if_else(int):
  [[assume(i >= 0 and i <= 2)]];               #   cmpl    $1, %edi
  if (i == 0) {                                #   movl    $77, %eax
    return f1();                               #   movl    $99, %ecx
  } else if (i == 1) {                         #   cmovel  %eax, %ecx
    return f2();                               #   testl   %edi, %edi
  } else if (i == 2) {                         #   movl    $42, %eax
    return f3();                               #   cmovnel %ecx, %eax
  } else {                                     #   retq
    std::unreachable();
  }
}

auto switch_case(int i) -> int {               # switch_case(int):
  [[assume(i >= 0 and i <= 2)]];               #   movl    %edi, %eax
  switch (i) {                                 #   leaq    .Lswitch.table(%rip), %rcx
    default: std::unreachable();               #   movl    (%rcx,%rax,4), %eax
    case 0: return f1();                       #   retq
    case 1: return f2();                       # .Lswitch.table(int):
    case 2: return f3();                       #   .long   42
  }                                            #   .long   77
}                                              #   .long   99

auto jump_table(int i) -> int {                # jump_table(int):
  [[assume(i >= 0 and i <= 2)]];               #   movl    %edi, %eax
  static constexpr int (*dispatch[])(int){     #   leaq    dispatch(%rip), %rcx
    [](int) { return f1(); },                  #   jmpq    *(%rcx,%rax,8) # always jmp
    [](int) { return f2(); },                  # dispatch:
    [](int) { return f3(); },                  #  .quad   f1()
  };                                           #  .quad   f2()
  [[clang::musttail]] return dispatch[i](i);   #  .quad   f3()
}

auto computed_goto(int i) -> int {             # computed_goto(int):
  [[assume(i >= 0 and i <= 2)]];               #   movl    %edi, %eax
  static constexpr void* labels[]{             #   leaq    labels(%rip), %rcx
    &&L1, &&L2, &&L3                           #   jmpq    *(%rcx,%rax,8)
  };                                           # .Ltmp35:
  goto *labels[i];                             #   movl    $42, %eax
  L1: return f1();                             #   retq
  L2: return f2();                             # .Ltmp37:
  L3: return f3();                             #   movl    $99, %eax
}                                              #   retq
                                               # .Ltmp39:
                                               #   movl    $77, %eax
                                               #   retq
                                               #
                                               # labels:
                                               #   .quad   .Ltmp35
                                               #   .quad   .Ltmp37
                                               #   .quad   .Ltmp39

jmp::static_branch<int, 0, 2> branch = 0;      # jmp():
auto jmp() -> int {                            #   jmp .LBB21_0|.LBB21_1|.LBB21_2
  switch (branch) {                            # .LBB21_0:
    default: std::unreachable();               #   movl    $42, %eax
    case 0: return f1();                       #   retq
    case 1: return f2();                       # .LBB21_1:
    case 2: return f3();                       #   movl    $99, %eax
  }                                            #   retq
}                                              # .LBB21_2:
                                               #   movl    $77, %eax
                                               #   retq

Fast/Slow path (https://godbolt.org/z/qvar9ThK9)

[[gnu::always_inline]] inline void fast_path() { std::puts("fast_path"); }
[[gnu::cold]] void slow_path() { std::puts("slow_path"); }

constexpr jmp::static_branch<bool> disarmed = false;

void trigger() {
  if (not disarmed) { // { false: nop, true: jmp }
    fast_path();
  } else {
    slow_path();
  }
}
trigger(): // $CXX -O3
  nop                              # code patching (nop->jmp .Ltmp1)
 .Ltmp0:                           # fast path (inlined)
  mov edi, OFFSET FLAT:.LC1
  jmp puts
 .Ltmp1:                           # slow path (cold)
  jmp slow_path() # [clone .cold]

API

/**
 * Minimal overhead (via code patching) static branch
 */
template<class T, T...> struct static_branch;

template<> struct static_branch<bool> final {
  /**
   * static_assert(sizeof(static_branch<bool>) == 1u)
   * @param value initial branch value (false)
   */
  constexpr explicit(false) static_branch(const bool value) noexcept;
  constexpr static_branch(const static_branch&) noexcept = delete;
  constexpr static_branch(static_branch&&) noexcept = delete;
  constexpr static_branch& operator=(const static_branch&) noexcept = delete;
  constexpr static_branch& operator=(static_branch&&) noexcept = delete;

  /**
   * Updates branch value
   * @param value new branch value
   */
  constexpr const auto& operator=(const bool value) const noexcept;

  [[gnu::always_inline]] [[nodiscard]]
  inline explicit(false) operator bool() const noexcept;
};

template<class T, T Min, T Max>
  requires requires(T t) { reinterpret_cast<T>(t); } and
  (Max - Min >= 2 and Max - Min <= 7)
struct static_branch<T, Min, Max> final {
  /**
   * static_assert(sizeof(static_branch<bool>) == 1u)
   * @param value initial branch value (false)
   */
  constexpr explicit(false) static_branch(const T value) noexcept;
  constexpr static_branch(const static_branch&) noexcept = delete;
  constexpr static_branch(static_branch&&) noexcept = delete;
  constexpr static_branch& operator=(const static_branch&) noexcept = delete;
  constexpr static_branch& operator=(static_branch&&) noexcept = delete;

  /**
   * Updates branch value
   * @param value new branch value
   */
  constexpr const auto& operator=(const T value) const noexcept;

  [[gnu::always_inline]] [[nodiscard]]
  inline explicit(false) operator T() const noexcept;
};

FAQ