diff --git a/cpp/benchmarks/decimal/convert_floating.cpp b/cpp/benchmarks/decimal/convert_floating.cpp index a367036c494..ac09c3400cb 100644 --- a/cpp/benchmarks/decimal/convert_floating.cpp +++ b/cpp/benchmarks/decimal/convert_floating.cpp @@ -32,8 +32,6 @@ void bench_cast_decimal(nvbench::state& state, nvbench::type_list || std::is_same_v; - static constexpr bool is_32bit = - std::is_same_v || std::is_same_v; static constexpr bool is_128bit = std::is_same_v || std::is_same_v; @@ -69,21 +67,6 @@ void bench_cast_decimal(nvbench::state& state, nvbench::type_list decimal conversion algorithm is limited - static constexpr bool is_64bit = !is_32bit && !is_128bit; - if (is_32bit && (exp_mode != 3)) { - state.skip("Decimal32 conversion only works up to scale factors of 10^9."); - return; - } - if (is_64bit && ((exp_mode < 2) || (exp_mode > 4))) { - state.skip("Decimal64 conversion only works up to scale factors of 10^18."); - return; - } - if (is_128bit && ((exp_mode == 0) || (exp_mode == 6))) { - state.skip("Decimal128 conversion only works up to scale factors of 10^38."); - return; - } - // Type IDs auto const input_id = cudf::type_to_id(); auto const output_id = cudf::type_to_id(); diff --git a/cpp/include/cudf/fixed_point/floating_conversion.hpp b/cpp/include/cudf/fixed_point/floating_conversion.hpp index 2c3a5c5629d..c64ae8877d4 100644 --- a/cpp/include/cudf/fixed_point/floating_conversion.hpp +++ b/cpp/include/cudf/fixed_point/floating_conversion.hpp @@ -18,6 +18,7 @@ #include +#include #include #include @@ -34,6 +35,49 @@ namespace numeric { namespace detail { +/** + * @brief Determine the number of significant bits in an integer + * + * @tparam T Type of input integer value. Must be either uint32_t, uint64_t, or __uint128_t + * @param value The integer whose bits are being counted + * @return The number of significant bits: the # of bits - # of leading zeroes + */ +template || std::is_same_v || + std::is_same_v)> +CUDF_HOST_DEVICE inline int count_significant_bits(T value) +{ +#ifdef __CUDA_ARCH__ + if constexpr (std::is_same_v) { + return 64 - __clzll(static_cast(value)); + } else if constexpr (std::is_same_v) { + return 32 - __clz(static_cast(value)); + } else if constexpr (std::is_same_v) { + // 128 bit type, must break up into high and low components + auto const high_bits = static_cast(value >> 64); + auto const low_bits = static_cast(value); + return 128 - (__clzll(high_bits) + static_cast(high_bits == 0) * __clzll(low_bits)); + } +#else + // Undefined behavior to call __builtin_clzll() with zero in gcc and clang + if (value == 0) { return 0; } + + if constexpr (std::is_same_v) { + return 64 - __builtin_clzll(value); + } else if constexpr (std::is_same_v) { + return 32 - __builtin_clz(value); + } else if constexpr (std::is_same_v) { + // 128 bit type, must break up into high and low components + auto const high_bits = static_cast(value >> 64); + if (high_bits == 0) { + return 64 - __builtin_clzll(static_cast(value)); + } else { + return 128 - __builtin_clzll(high_bits); + } + } +#endif +} + /** * @brief Helper struct for getting and setting the components of a floating-point value * @@ -62,27 +106,28 @@ struct floating_converter { // The low 23 / 52 bits (for float / double) are the mantissa. // The mantissa is normalized. There is an understood 1 bit to the left of the binary point. // The value of the mantissa is in the range [1, 2). - /// # mantissa bits (-1 for understood bit) - static constexpr int num_mantissa_bits = cuda::std::numeric_limits::digits - 1; + /// # significand bits (includes understood bit) + static constexpr int num_significand_bits = cuda::std::numeric_limits::digits; + /// # stored mantissa bits (-1 for understood bit) + static constexpr int num_stored_mantissa_bits = num_significand_bits - 1; /// The mask for the understood bit - static constexpr IntegralType understood_bit_mask = (IntegralType(1) << num_mantissa_bits); + static constexpr IntegralType understood_bit_mask = (IntegralType(1) << num_stored_mantissa_bits); /// The mask to select the mantissa static constexpr IntegralType mantissa_mask = understood_bit_mask - 1; // And in between are the bits used to store the biased power-of-2 exponent. /// # exponents bits (-1 for sign bit) - static constexpr int num_exponent_bits = num_floating_bits - num_mantissa_bits - 1; + static constexpr int num_exponent_bits = num_floating_bits - num_stored_mantissa_bits - 1; /// The mask for the exponents, unshifted static constexpr IntegralType unshifted_exponent_mask = (IntegralType(1) << num_exponent_bits) - 1; /// The mask to select the exponents - static constexpr IntegralType exponent_mask = unshifted_exponent_mask << num_mantissa_bits; + static constexpr IntegralType exponent_mask = unshifted_exponent_mask << num_stored_mantissa_bits; // To store positive and negative exponents as unsigned values, the stored value for // the power-of-2 is exponent + bias. The bias is 127 for floats and 1023 for doubles. /// 127 / 1023 for float / double - static constexpr IntegralType exponent_bias = - cuda::std::numeric_limits::max_exponent - 1; + static constexpr int exponent_bias = cuda::std::numeric_limits::max_exponent - 1; /** * @brief Reinterpret the bits of a floating-point value as an integer @@ -113,15 +158,15 @@ struct floating_converter { } /** - * @brief Extracts the integral significand of a bit-casted floating-point number + * @brief Checks whether the bit-casted floating-point value is +/-0 * - * @param integer_rep The bit-casted floating value to extract the exponent from - * @return The integral significand, bit-shifted to a (large) whole number + * @param integer_rep The bit-casted floating value to check if is +/-0 + * @return True if is a zero, else false */ - CUDF_HOST_DEVICE inline static IntegralType get_base2_value(IntegralType integer_rep) + CUDF_HOST_DEVICE inline static bool is_zero(IntegralType integer_rep) { - // Extract the significand, setting the high bit for the understood 1/2 - return (integer_rep & mantissa_mask) | understood_bit_mask; + // It's a zero if every non-sign bit is zero + return ((integer_rep & ~sign_mask) == 0); } /** @@ -137,40 +182,59 @@ struct floating_converter { } /** - * @brief Extracts the exponent of a bit-casted floating-point number + * @brief Extracts the significand and exponent of a bit-casted floating-point number, + * shifted for denormals. * - * @note This returns INT_MIN for +/-0, +/-inf, NaN's, and denormals - * For all of these cases, the decimal fixed_point number should be set to zero + * @note Zeros/inf/NaN not handled. * * @param integer_rep The bit-casted floating value to extract the exponent from - * @return The stored base-2 exponent, or INT_MIN for special values + * @return The stored base-2 exponent and significand, shifted for denormals */ - CUDF_HOST_DEVICE inline static int get_exp2(IntegralType integer_rep) + CUDF_HOST_DEVICE inline static std::pair get_significand_and_pow2( + IntegralType integer_rep) { - // First extract the exponent bits and handle its special values. - // To minimize branching, all of these special cases will return INT_MIN. - // For all of these cases, the decimal fixed_point number should be set to zero. + // Extract the significand + auto significand = (integer_rep & mantissa_mask); + + // Extract the exponent bits. auto const exponent_bits = integer_rep & exponent_mask; + + // Notes on special values of exponent_bits: + // bits = exponent_mask is +/-inf or NaN, but those are handled prior to input. + // bits = 0 is either a denormal (handled below) or a zero (handled earlier by caller). + int floating_pow2; if (exponent_bits == 0) { - // Because of the understood set-bit not stored in the mantissa, it is not possible - // to store the value zero directly. Instead both +/-0 and denormals are represented with - // the exponent bits set to zero. - // Thus it's fastest to just floor (generally unwanted) denormals to zero. - return INT_MIN; - } else if (exponent_bits == exponent_mask) { - //+/-inf and NaN values are stored with all of the exponent bits set. - // As none of these are representable by integers, we'll return the same value for all cases. - return INT_MIN; + // Denormal values are 2^(1 - exponent_bias) * Sum_i(B_i * 2^-i) + // Where i is the i-th mantissa bit (counting from the LEFT, starting at 1), + // and B_i is the value of that bit (0 or 1) + // So e.g. for the minimum denormal, only the lowest bit is set: + // FLT_TRUE_MIN = 2^(1 - 127) * 2^-23 = 2^-149 + // DBL_TRUE_MIN = 2^(1 - 1023) * 2^-52 = 2^-1074 + floating_pow2 = 1 - exponent_bias; + + // Line-up denormal to same (understood) bit as normal numbers + // This is so bit-shifting starts at the same bit index + auto const lineup_shift = num_significand_bits - count_significant_bits(significand); + significand <<= lineup_shift; + floating_pow2 -= lineup_shift; + } else { + // Extract the exponent value: shift the bits down and subtract the bias. + auto const shifted_exponent_bits = exponent_bits >> num_stored_mantissa_bits; + floating_pow2 = static_cast(shifted_exponent_bits) - exponent_bias; + + // Set the high bit for the understood 1/2 + significand |= understood_bit_mask; } - // Extract the exponent value: shift the bits down and subtract the bias. - using SignedIntegralType = cuda::std::make_signed_t; - SignedIntegralType const shifted_exponent_bits = exponent_bits >> num_mantissa_bits; - return shifted_exponent_bits - static_cast(exponent_bias); + // To convert the mantissa to an integer, we effectively applied #-mantissa-bits + // powers of 2 to convert the fractional value to an integer, so subtract them off here + int const pow2 = floating_pow2 - num_stored_mantissa_bits; + + return {significand, pow2}; } /** - * @brief Sets the sign bit of a positive floating-point number + * @brief Sets the sign bit of a floating-point number * * @param floating The floating-point value to set the sign of. Must be positive. * @param is_negative The sign bit to set for the floating-point number @@ -192,83 +256,60 @@ struct floating_converter { /** * @brief Adds to the base-2 exponent of a floating-point number * + * @note The caller must guarantee that the input is a positive (> 0) whole number. + * * @param floating The floating value to add to the exponent of. Must be positive. - * @param exp2 The power-of-2 to add to the floating-point number - * @return The input floating-point value * 2^exp2 + * @param pow2 The power-of-2 to add to the floating-point number + * @return The input floating-point value * 2^pow2 */ - CUDF_HOST_DEVICE inline static FloatingType add_exp2(FloatingType floating, int exp2) + CUDF_HOST_DEVICE inline static FloatingType add_pow2(FloatingType floating, int pow2) { + // Note that the input floating-point number is positive (& whole), so we don't have to + // worry about the sign here; the sign will be set later in set_is_negative() + // Convert floating to integer auto integer_rep = bit_cast_to_integer(floating); // Extract the currently stored (biased) exponent + using SignedType = std::make_signed_t; auto exponent_bits = integer_rep & exponent_mask; - auto stored_exp2 = exponent_bits >> num_mantissa_bits; + auto stored_pow2 = static_cast(exponent_bits >> num_stored_mantissa_bits); // Add the additional power-of-2 - stored_exp2 += exp2; + stored_pow2 += pow2; // Check for exponent over/under-flow. - // Note that the input floating-point number is always positive, so we don't have to - // worry about the sign here; the sign will be set later in set_is_negative() - if (stored_exp2 <= 0) { - return 0.0; - } else if (stored_exp2 >= unshifted_exponent_mask) { + if (stored_pow2 <= 0) { + // Denormal (zero handled prior to input) + + // Early out if bit shift will zero it anyway. + // Note: We must handle this explicitly, as too-large a bit-shift is UB + auto const bit_shift = -stored_pow2 + 1; //+1 due to understood bit set below + if (bit_shift > num_stored_mantissa_bits) { return 0.0; } + + // Clear the exponent bits (zero means 2^-126/2^-1022 w/ no understood bit) + integer_rep &= (~exponent_mask); + + // The input floating-point number has an "understood" bit that we need to set + // prior to bit-shifting. Set the understood bit. + integer_rep |= understood_bit_mask; + + // Convert to denormal: bit shift off the low bits + integer_rep >>= bit_shift; + } else if (stored_pow2 >= static_cast(unshifted_exponent_mask)) { + // Overflow: Set infinity return cuda::std::numeric_limits::infinity(); } else { - // Clear existing exponent bits and set new ones - exponent_bits = stored_exp2 << num_mantissa_bits; + // Normal number: Clear existing exponent bits and set new ones + exponent_bits = static_cast(stored_pow2) << num_stored_mantissa_bits; integer_rep &= (~exponent_mask); integer_rep |= exponent_bits; - - // Convert back to float - return bit_cast_to_floating(integer_rep); } - } -}; -/** - * @brief Determine the number of significant bits in an integer - * - * @tparam T Type of input integer value. Must be either uint32_t, uint64_t, or __uint128_t - * @param value The integer whose bits are being counted - * @return The number of significant bits: the # of bits - # of leading zeroes - */ -template || std::is_same_v || - std::is_same_v)> -CUDF_HOST_DEVICE inline int count_significant_bits(T value) -{ -#ifdef __CUDA_ARCH__ - if constexpr (std::is_same_v) { - return 64 - __clzll(static_cast(value)); - } else if constexpr (std::is_same_v) { - return 32 - __clz(static_cast(value)); - } else if constexpr (std::is_same_v) { - // 128 bit type, must break up into high and low components - auto const high_bits = static_cast(value >> 64); - auto const low_bits = static_cast(value); - return 128 - (__clzll(high_bits) + static_cast(high_bits == 0) * __clzll(low_bits)); - } -#else - // Undefined behavior to call __builtin_clzll() with zero in gcc and clang - if (value == 0) { return 0; } - - if constexpr (std::is_same_v) { - return 64 - __builtin_clzll(value); - } else if constexpr (std::is_same_v) { - return 32 - __builtin_clz(value); - } else if constexpr (std::is_same_v) { - // 128 bit type, must break up into high and low components - auto const high_bits = static_cast(value >> 64); - if (high_bits == 0) { - return 64 - __builtin_clzll(static_cast(value)); - } else { - return 128 - __builtin_clzll(high_bits); - } + // Convert back to float + return bit_cast_to_floating(integer_rep); } -#endif -} +}; /** * @brief Recursively calculate a signed large power of 10 (>= 10^19) that can only be stored in an @@ -276,18 +317,18 @@ CUDF_HOST_DEVICE inline int count_significant_bits(T value) * * @note Intended to be run at compile time. * - * @tparam Exp10 The power of 10 to calculate - * @return Returns 10^Exp10 + * @tparam Pow10 The power of 10 to calculate + * @return Returns 10^Pow10 */ -template +template constexpr __uint128_t large_power_of_10() { // Stop at 10^19 to speed up compilation; literals can be used for smaller powers of 10. - static_assert(Exp10 >= 19); - if constexpr (Exp10 == 19) + static_assert(Pow10 >= 19); + if constexpr (Pow10 == 19) return __uint128_t(10000000000000000000ULL); else - return large_power_of_10() * __uint128_t(10); + return large_power_of_10() * __uint128_t(10); } /** @@ -295,11 +336,11 @@ constexpr __uint128_t large_power_of_10() * * @tparam T Type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator, from 0 to 9 inclusive. - * @return Returns value / 10^exp10 + * @param pow10 The power-of-10 of the denominator, from 0 to 9 inclusive. + * @return Returns value / 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int pow10) { // Computing division this way is much faster than the alternatives. // Division is not implemented in GPU hardware, and the compiler will often implement it as a @@ -309,7 +350,7 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) // Instead, if the compiler can see exactly what number it is dividing by, it can // produce much more optimal assembly, doing bit shifting, multiplies by a constant, etc. - // For the compiler to see the value though, array lookup (with exp10 as the index) + // For the compiler to see the value though, array lookup (with pow10 as the index) // is not sufficient: We have to use a switch statement. Although this introduces a branch, // it is still much faster than doing the divide any other way. // Perhaps an array can be used in C++23 with the assume attribute? @@ -325,7 +366,7 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) // introduces too much pressure on the kernels that use this code, slowing down their benchmarks. // It also dramatically slows down the compile time. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value / 10U; case 2: return value / 100U; @@ -345,14 +386,14 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) * * @tparam T Type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator, from 0 to 19 inclusive. - * @return Returns value / 10^exp10 + * @param pow10 The power-of-10 of the denominator, from 0 to 19 inclusive. + * @return Returns value / 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int pow10) { // See comments in divide_power10_32bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value / 10U; case 2: return value / 100U; @@ -382,14 +423,14 @@ CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int exp10) * * @tparam T Type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator, from 0 to 38 inclusive. - * @return Returns value / 10^exp10. + * @param pow10 The power-of-10 of the denominator, from 0 to 38 inclusive. + * @return Returns value / 10^pow10. */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int pow10) { // See comments in divide_power10_32bit() for an introduction. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value / 10U; case 2: return value / 100U; @@ -438,14 +479,14 @@ CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int exp10) * * @tparam T Type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier, from 0 to 9 inclusive. - * @return Returns value * 10^exp10 + * @param pow10 The power-of-10 of the multiplier, from 0 to 9 inclusive. + * @return Returns value * 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int pow10) { // See comments in divide_power10_32bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value * 10U; case 2: return value * 100U; @@ -465,14 +506,14 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int exp10) * * @tparam T Type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier, from 0 to 19 inclusive. - * @return Returns value * 10^exp10 + * @param pow10 The power-of-10 of the multiplier, from 0 to 19 inclusive. + * @return Returns value * 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int pow10) { // See comments in divide_power10_32bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value * 10U; case 2: return value * 100U; @@ -502,14 +543,14 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int exp10) * * @tparam T Type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier, from 0 to 38 inclusive. - * @return Returns value * 10^exp10. + * @param pow10 The power-of-10 of the multiplier, from 0 to 38 inclusive. + * @return Returns value * 10^pow10. */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int pow10) { // See comments in divide_power10_128bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value * 10U; case 2: return value * 100U; @@ -556,59 +597,678 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int exp10) /** * @brief Multiply an integer by a power of 10. * - * @note Use this function if you have no a-priori knowledge of what exp10 might be. + * @note Use this function if you have no a-priori knowledge of what pow10 might be. * If you do, prefer calling the bit-size-specific versions * * @tparam Rep Representation type needed for integer exponentiation * @tparam T Integral type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier. - * @return Returns value * 10^exp10 + * @param pow10 The power-of-10 of the multiplier. + * @return Returns value * 10^pow10 */ -template )>* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10(T value, int pow10) { - // Use this function if you have no knowledge of what exp10 might be + // Use this function if you have no knowledge of what pow10 might be // If you do, prefer calling the bit-size-specific versions if constexpr (sizeof(Rep) <= 4) { - return multiply_power10_32bit(value, exp10); + return multiply_power10_32bit(value, pow10); } else if constexpr (sizeof(Rep) <= 8) { - return multiply_power10_64bit(value, exp10); + return multiply_power10_64bit(value, pow10); } else { - return multiply_power10_128bit(value, exp10); + return multiply_power10_128bit(value, pow10); } } /** * @brief Divide an integer by a power of 10. * - * @note Use this function if you have no a-priori knowledge of what exp10 might be. + * @note Use this function if you have no a-priori knowledge of what pow10 might be. * If you do, prefer calling the bit-size-specific versions * * @tparam Rep Representation type needed for integer exponentiation * @tparam T Integral type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator. - * @return Returns value / 10^exp10 + * @param pow10 The power-of-10 of the denominator. + * @return Returns value / 10^pow10 */ -template )>* = nullptr> -CUDF_HOST_DEVICE inline constexpr T divide_power10(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T divide_power10(T value, int pow10) { - // Use this function if you have no knowledge of what exp10 might be + // Use this function if you have no knowledge of what pow10 might be // If you do, prefer calling the bit-size-specific versions if constexpr (sizeof(Rep) <= 4) { - return divide_power10_32bit(value, exp10); + return divide_power10_32bit(value, pow10); } else if constexpr (sizeof(Rep) <= 8) { - return divide_power10_64bit(value, exp10); + return divide_power10_64bit(value, pow10); } else { - return divide_power10_128bit(value, exp10); + return divide_power10_128bit(value, pow10); } } +/** + * @brief Perform a bit-shift left, guarding against undefined behavior + * + * @tparam IntegerType Type of input unsigned integer value + * @param value The integer whose bits are being shifted + * @param bit_shift The number of bits to shift left + * @return The bit-shifted integer, except max value if UB would occur + */ +template )> +CUDF_HOST_DEVICE inline IntegerType guarded_left_shift(IntegerType value, int bit_shift) +{ + // Bit shifts larger than this are undefined behavior + constexpr int max_safe_bit_shift = cuda::std::numeric_limits::digits - 1; + return (bit_shift <= max_safe_bit_shift) ? value << bit_shift + : cuda::std::numeric_limits::max(); +} + +/** + * @brief Perform a bit-shift right, guarding against undefined behavior + * + * @tparam IntegerType Type of input unsigned integer value + * @param value The integer whose bits are being shifted + * @param bit_shift The number of bits to shift right + * @return The bit-shifted integer, which is zero on underflow + */ +template )> +CUDF_HOST_DEVICE inline IntegerType guarded_right_shift(IntegerType value, int bit_shift) +{ + // Bit shifts larger than this are undefined behavior + constexpr int max_safe_bit_shift = cuda::std::numeric_limits::digits - 1; + return (bit_shift <= max_safe_bit_shift) ? value >> bit_shift : 0; +} + +/** + * @brief Helper struct with common constants needed by the floating <--> decimal conversions + */ +template +struct shifting_constants { + /// Whether the type is double + static constexpr bool is_double = cuda::std::is_same_v; + + /// Integer type that can hold the value of the significand + using IntegerRep = std::conditional_t; + + /// Num bits needed to hold the significand + static constexpr auto num_significand_bits = cuda::std::numeric_limits::digits; + + /// Shift data back and forth in space of a type with 2x the starting bits, to give us enough room + using ShiftingRep = std::conditional_t; + + // The significand of a float / double is 24 / 53 bits + // However, to uniquely represent each double / float as different #'s in decimal + // you need 17 / 9 digits (from std::numeric_limits::max_digits10) + // To represent 10^17 / 10^9, you need 57 / 30 bits + // So we need to keep track of at least this # of bits during shifting to ensure no info is lost + + // We will be alternately shifting our data back and forth by powers of 2 and 10 to convert + // between floating and decimal (see shifting functions for details). + + // To iteratively shift back and forth, our 2's (bit-) and 10's (divide-/multiply-) shifts must + // be of nearly the same magnitude, or else we'll over-/under-flow our shifting integer + + // 2^10 is approximately 10^3, so the largest shifts will have a 10/3 ratio + // The difference between 2^10 and 10^3 is 1024/1000: 2.4% + // So every time we shift by 10 bits and 3 decimal places, the 2s shift is an extra 2.4% + + // This 2.4% error compounds each time we do an iteration. + // The min (normal) float is 2^-126. + // Min denormal: 2^-126 * 2^-23 (mantissa bits): 2^-149 = ~1.4E-45 + // With our 10/3 shifting ratio, 149 (bit-shifts) * (3 / 10) = 44.7 (10s-shifts) + // 10^(-44.7) = 2E-45, which is off by ~1.4x from 1.4E-45 + + // Similarly, the min (normal) double is 2^-1022. + // Min denormal: 2^-1022 * 2^-52 (mantissa bits): 2^-1074 = 4.94E-324 + // With our 10/3 shifting ratio, 1074 (bit-shifts) * (3 / 10) = 322.2 (10s-shifts) + // 10^(-322.2) = 6.4E-323, which is off by ~13.2x from 4.94E-324 + + // To account for this compounding error, we can either complicate our loop code (slow), + // or use extra bits (in the direction we're shifting the 2s!) to compensate: + // 4 extra bits for doubles (2^4 = 16 > 13.2x error), 1 extra for floats (2 > 1.4x error) + /// # buffer bits to account for shifting error + static constexpr int num_2s_shift_buffer_bits = is_double ? 4 : 1; + + // How much room do we have for shifting? + // Float: 64-bit ShiftingRep - 31 (rep + buffer) = 33 bits. 2^33 = 8.6E9 + // Double: 128-bit ShiftingRep - 61 (rep + buffer) = 67 bits. 2^67 = 1.5E20 + // Thus for double / float we can shift up to 20 / 9 decimal places at once + + // But, we need to stick to our 10-bits / 3-decimals shift ratio to not over/under-flow. + // To simplify our loop code, we'll keep to this ratio by instead shifting a max of + // 18 / 9 decimal places, for double / float (60 / 30 bits) + /// Max at-once decimal place shift + static constexpr int max_digits_shift = is_double ? 18 : 9; + /// Max at-once bit shift + static constexpr int max_bits_shift = max_digits_shift * 10 / 3; + + // Pre-calculate 10^max_digits_shift. Note that 10^18 / 10^9 fits within IntegerRep + /// 10^max_digits_shift + static constexpr auto max_digits_shift_pow = + multiply_power10(IntegerRep(1), max_digits_shift); +}; + +/** + * @brief Add half a bit to integer rep of floating point if conversion causes truncation + * + * @note This fixes problems like 1.2 (value = 1.1999...) at scale -1 -> 11 + * + * @tparam FloatingType Type of integer holding the floating-point significand + * @param floating The floating-point number to convert + * @param integer_rep The integer representation of the floating-point significand + * @param pow2 The power of 2 that needs to be applied to the significand + * @param pow10 The power of 10 that needs to be applied to the significand + * @return integer_rep, shifted 1 and ++'d if the conversion to decimal causes truncation + */ +template )> +CUDF_HOST_DEVICE cuda::std::pair::IntegralType, int> +add_half_if_truncates(FloatingType floating, + typename floating_converter::IntegralType integer_rep, + int pow2, + int pow10) +{ + // The user-supplied scale may truncate information, so we need to talk about rounding. + // We have chosen not to round, so we want 1.23456f with scale -4 to be decimal 12345 + + // But if we don't round at all, 1.2 (double) with scale -1 is 11 instead of 12! + // Why? Because 1.2 (double) is actually stored as 1.1999999... which we truncate to 1.1 + // While correct (given our choice to truncate), this is surprising and undesirable. + // This problem happens because 1.2 is not perfectly representable in floating point, + // and the value 1.199999... happened to be closer to 1.2 than the next value (1.2000...1...) + + // If the scale truncates information (we didn't choose to keep exactly 1.1999...), how + // do we make sure we store 1.2? We'll add half an ulp! (unit in the last place) + // Then 1.1999... becomes 1.2000...1... which truncates to 1.2. + // And if it had been 1.2000...1..., adding half an ulp still truncates to 1.2 + + // Why 1/2 an ulp? Because that's all that is needed. The reason we have this problem in the + // first place is because the compiler rounded (e.g.) 1.2 to the nearest floating point number. + // The distance of this rounding is at most 1/2 ulp, otherwise we'd have rounded the other way. + + // How do we add 1/2 an ulp? Just shift the bits left (updating pow2) and add 1. + // We'll always shift up so every input to the conversion algorithm is aligned the same way. + + // If we add a full ulp we run into issues where we add too much and get the wrong result. + // This is because (e.g.) 2^23 = 8.4E6 which is not quite 7 digits of precision. + // So if we want 7 digits, that may "barely" truncate information; adding a 1 ulp is overkill. + + // So when does the user-supplied scale truncate info? + // For powers > 0: When the 10s (scale) shift is larger than the corresponding bit-shift. + // For powers < 0: When the 10s shift is less than the corresponding bit-shift. + + // Corresponding bit-shift: + // 2^10 is approximately 10^3, but this is off by 1.024% + // 1.024^30 is 2.03704, so this is high by one bit for every 30*3 = 90 powers of 10 + // So 10^N = 2^(10*N/3 - N/90) = 2^(299*N/90) + // Do comparison without dividing, which loses information: + // Note: if shift is "equal," still truncates if pow2 < 0 (shifting UP by 2s, 2^10 > 10^3) + int const pow2_term = 90 * pow2; + int const pow10_term = 299 * pow10; + bool const conversion_truncates = + (pow10_term > pow2_term) || ((pow2_term == pow10_term) && (pow2 < 0)); + + // However, don't add a half-bit if the input is a whole number! + // This is only for errors introduced by rounding decimal fractions! + bool const is_whole_number = (cuda::std::floor(floating) == floating); + bool const add_half_bit = conversion_truncates && !is_whole_number; + + // Add half a bit on truncation (shift to make room and update pow2) + integer_rep <<= 1; + --pow2; + integer_rep += static_cast(add_half_bit); + + return {integer_rep, pow2}; +} + +/** + * @brief Perform base-2 -> base-10 fixed-point conversion for pow10 > 0 + * + * @tparam Rep The type of the storage for the decimal value + * @tparam FloatingType The type of the original floating-point value we are converting from + * @param base2_value The base-2 fixed-point value we are converting from + * @param pow2 The number of powers of 2 to apply to convert from base-2 + * @param pow10 The number of powers of 10 to apply to reach the desired scale factor + * @return Magnitude of the converted-to decimal integer + */ +template )> +CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t shift_to_decimal_pospow( + typename shifting_constants::IntegerRep const base2_value, int pow2, int pow10) +{ + // To convert to decimal, we need to apply the input powers of 2 and 10 + // The result will be (integer) base2_value * (2^pow2) / (10^pow10) + // Output type is ShiftingRep + + // Here pow10 > 0 and pow2 > 0, so we need to shift left by 2s and divide by 10s. + // We'll iterate back and forth between them, shifting up by 2s + // and down by 10s until all of the powers have been applied. + + // However the input base2_value type has virtually no spare room to shift our data + // without over- or under-flowing and losing precision. + // So we'll cast up to ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + auto shifting_rep = static_cast(base2_value); + + // We want to start with our significand bits at the top of the shifting range, + // so that we don't lose information we need on intermediary right-shifts. + // Note that since we're shifting 2s up, we need num_2s_shift_buffer_bits space on the high side, + // For all numbers this bit shift is a fixed distance, due to the understood 2^0 bit. + // Note that shift_from is +1 due to shift in add_half_if_truncates() + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::num_2s_shift_buffer_bits; + static constexpr int shift_from = Constants::num_significand_bits + 1; + static constexpr int max_init_shift = shift_up_to - shift_from; + + // If our total bit shift is less than this, we don't need to iterate + using UnsignedRep = cuda::std::make_unsigned_t; + if (pow2 <= max_init_shift) { + // Shift bits left, divide by 10s to apply the scale factor, and we're done. + shifting_rep = divide_power10(shifting_rep << pow2, pow10); + // NOTE: Cast can overflow! + return static_cast(shifting_rep); + } + + // We need to iterate. Do the combined initial shift + shifting_rep <<= max_init_shift; + pow2 -= max_init_shift; + + // Iterate, dividing by 10s and shifting up by 2s until we're almost done + while (pow10 > Constants::max_digits_shift) { + // More decimal places to shift than we have room: Divide the max number of 10s + shifting_rep /= Constants::max_digits_shift_pow; + pow10 -= Constants::max_digits_shift; + + // If our remaining bit shift is less than the max, we're finished iterating + if (pow2 <= Constants::max_bits_shift) { + // Shift bits left, divide by 10s to apply the scale factor, and we're done. + shifting_rep = divide_power10(shifting_rep << pow2, pow10); + + // NOTE: Cast can overflow! + return static_cast(shifting_rep); + } + + // Shift the max number of bits left again + shifting_rep <<= Constants::max_bits_shift; + pow2 -= Constants::max_bits_shift; + } + + // Last 10s-shift: Divide all remaining decimal places, shift all remaining bits, then bail + // Note: This divide result may not fit in the low half of the bit range + // But the divisor is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = divide_power10_64bit(shifting_rep, pow10); + } else { + shifting_rep = divide_power10_32bit(shifting_rep, pow10); + } + + // Final bit shift: Shift may be large, guard against UB + // NOTE: This can overflow (both cast and shift)! + return guarded_left_shift(static_cast(shifting_rep), pow2); +} + +/** + * @brief Perform base-2 -> base-10 fixed-point conversion for pow10 < 0 + * + * @tparam Rep The type of the storage for the decimal value + * @tparam FloatingType The type of the original floating-point value we are converting from + * @param base2_value The base-2 fixed-point value we are converting from + * @param pow2 The number of powers of 2 to apply to convert from base-2 + * @param pow10 The number of powers of 10 to apply to reach the desired scale factor + * @return Magnitude of the converted-to decimal integer + */ +template )> +CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t shift_to_decimal_negpow( + typename shifting_constants::IntegerRep base2_value, int pow2, int pow10) +{ + // This is similar to shift_to_decimal_pospow(), except pow10 < 0 & pow2 < 0 + // See comments in that function for details. + // Instead here we need to multiply by 10s and shift right by 2s + + // ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + auto shifting_rep = static_cast(base2_value); + + // Convert to using positive values so we don't have keep negating + int pow10_mag = -pow10; + int pow2_mag = -pow2; + + // For performing final 10s-shift + using UnsignedRep = cuda::std::make_unsigned_t; + auto final_shifts_low10s = [&]() { + // Last 10s-shift: multiply all remaining decimal places, shift all remaining bits, then bail + // The multiplier is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = multiply_power10_64bit(shifting_rep, pow10_mag); + } else { + shifting_rep = multiply_power10_32bit(shifting_rep, pow10_mag); + } + + // Final bit shifting: Shift may be large, guard against UB + return static_cast(guarded_right_shift(shifting_rep, pow2_mag)); + }; + + // If our total decimal shift is less than the max, we don't need to iterate + if (pow10_mag <= Constants::max_digits_shift) { return final_shifts_low10s(); } + + // We want to start by lining up our bits to the top of the shifting range, + // except our first operation is a multiply, so not quite that far + // We are bit-shifting down, so we need extra bits on the low-side, which this has. + // Note that shift_from is +1 due to shift in add_half_if_truncates() + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::max_bits_shift; + static constexpr int shift_from = Constants::num_significand_bits + 1; + static constexpr int num_init_bit_shift = shift_up_to - shift_from; + + // Perform initial shift + shifting_rep <<= num_init_bit_shift; + pow2_mag += num_init_bit_shift; + + // Iterate, multiplying by 10s and shifting down by 2s until we're almost done + do { + // More decimal places to shift than we have room: Multiply the max number of 10s + shifting_rep *= Constants::max_digits_shift_pow; + pow10_mag -= Constants::max_digits_shift; + + // If our remaining bit shift is less than the max, we're finished iterating + if (pow2_mag <= Constants::max_bits_shift) { + // Last bit-shift: Shift all remaining bits, apply the remaining scale, then bail + shifting_rep >>= pow2_mag; + + // We need to convert to the output rep for the final scale-factor multiply, because if (e.g.) + // float -> dec128 and some large pow10_mag, it might overflow the 64bit shifting rep. + // It's not needed for pow10 > 0 because we're dividing by 10s there instead of multiplying. + // NOTE: This can overflow! (Both multiply and cast) + return multiply_power10(static_cast(shifting_rep), pow10_mag); + } + + // More bits to shift than we have room: Shift the max number of 2s + shifting_rep >>= Constants::max_bits_shift; + pow2_mag -= Constants::max_bits_shift; + } while (pow10_mag > Constants::max_digits_shift); + + // Do our final shifts + return final_shifts_low10s(); +} + +/** + * @brief Perform base-2 -> base-10 fixed-point conversion + * + * @tparam Rep The type of integer we are converting to, to store the decimal value + * @tparam FloatingType The type of floating-point object we are converting from + * @param base2_value The base-2 fixed-point value we are converting from + * @param pow2 The number of powers of 2 to apply to convert from base-2 + * @param pow10 The number of powers of 10 to apply to reach the desired scale factor + * @return Integer representation of the floating-point value, given the desired scale + */ +template )> +CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t convert_floating_to_integral_shifting( + typename floating_converter::IntegralType base2_value, int pow10, int pow2) +{ + // Apply the powers of 2 and 10 to convert to decimal. + // The result will be base2_value * (2^pow2) / (10^pow10) + + // Note that while this code is branchy, the decimal scale factor is part of the + // column type itself, so every thread will take the same branches on pow10. + // Also data within a column tends to be similar, so they will often take the + // same branches on pow2 as well. + + // NOTE: some returns here can overflow (e.g. ShiftingRep -> UnsignedRep) + using UnsignedRep = cuda::std::make_unsigned_t; + if (pow10 == 0) { + // NOTE: Left Bit-shift can overflow! As can cast! (e.g. double -> decimal32) + // Bit shifts may be large, guard against UB + if (pow2 >= 0) { + return guarded_left_shift(static_cast(base2_value), pow2); + } else { + return static_cast(guarded_right_shift(base2_value, -pow2)); + } + } else if (pow10 > 0) { + if (pow2 <= 0) { + // Power-2/10 shifts both downward: order doesn't matter, apply and bail. + // Guard against shift being undefined behavior + auto const shifted = guarded_right_shift(base2_value, -pow2); + return static_cast(divide_power10(shifted, pow10)); + } + return shift_to_decimal_pospow(base2_value, pow2, pow10); + } else { // pow10 < 0 + if (pow2 >= 0) { + // Power-2/10 shifts both upward: order doesn't matter, apply and bail. + // NOTE: Either shift, multiply, or cast (e.g. double -> decimal32) can overflow! + auto const shifted = guarded_left_shift(static_cast(base2_value), pow2); + return multiply_power10(shifted, -pow10); + } + return shift_to_decimal_negpow(base2_value, pow2, pow10); + } +} + +/** + * @brief Perform floating-point -> integer decimal conversion + * + * @tparam Rep The type of integer we are converting to, to store the decimal value + * @tparam FloatingType The type of floating-point object we are converting from + * @param floating The floating point value to convert + * @param scale The desired base-10 scale factor: decimal value = returned value * 10^scale + * @return Integer representation of the floating-point value, given the desired scale + */ +template )> +CUDF_HOST_DEVICE inline Rep convert_floating_to_integral(FloatingType const& floating, + scale_type const& scale) +{ + // Extract components of the floating point number + using converter = floating_converter; + auto const integer_rep = converter::bit_cast_to_integer(floating); + if (converter::is_zero(integer_rep)) { return 0; } + + // Note that the significand here is an unsigned integer with sizeof(FloatingType) + auto const is_negative = converter::get_is_negative(integer_rep); + auto const [significand, floating_pow2] = converter::get_significand_and_pow2(integer_rep); + + // Add half a bit if truncating to yield expected value, see function for discussion. + auto const pow10 = static_cast(scale); + auto const [base2_value, pow2] = + add_half_if_truncates(floating, significand, floating_pow2, pow10); + + // Apply the powers of 2 and 10 to convert to decimal. + auto const magnitude = + convert_floating_to_integral_shifting(base2_value, pow10, pow2); + + // Reapply the sign and return + // NOTE: Cast can overflow! + auto const signed_magnitude = static_cast(magnitude); + return is_negative ? -signed_magnitude : signed_magnitude; +} + +/** + * @brief Perform base-10 -> base-2 fixed-point conversion for pow10 > 0 + * + * @tparam DecimalRep The decimal integer type we are converting from + * @tparam FloatingType The type of floating point object we are converting to + * @param decimal_rep The decimal integer to convert + * @param pow10 The number of powers of 10 to apply to undo the scale factor + * @return A pair of the base-2 value and the remaining powers of 2 to be applied + */ +template )> +CUDF_HOST_DEVICE inline auto shift_to_binary_pospow(DecimalRep decimal_rep, int pow10) +{ + // This is the reverse of shift_to_decimal_pospow(), see that for more details. + + // ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + + // We want to start by lining up our bits to the top of the shifting range, + // except our first operation is a multiply, so not quite that far + // We are bit-shifting down, so we need extra bits on the low-side, which this has. + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::max_bits_shift; + int const shift_from = count_significant_bits(decimal_rep); + int const num_init_bit_shift = shift_up_to - shift_from; + int pow2 = -num_init_bit_shift; + + // Perform the initial bit shift + ShiftingRep shifting_rep; + if constexpr (sizeof(ShiftingRep) < sizeof(DecimalRep)) { + // Shift within DecimalRep before dropping to the smaller ShiftingRep + decimal_rep = (pow2 >= 0) ? (decimal_rep >> pow2) : (decimal_rep << -pow2); + shifting_rep = static_cast(decimal_rep); + } else { + // Scale up to ShiftingRep before shifting + shifting_rep = static_cast(decimal_rep); + shifting_rep = (pow2 >= 0) ? (shifting_rep >> pow2) : (shifting_rep << -pow2); + } + + // Iterate, multiplying by 10s and shifting down by 2s until we're almost done + while (pow10 > Constants::max_digits_shift) { + // More decimal places to shift than we have room: Multiply the max number of 10s + shifting_rep *= Constants::max_digits_shift_pow; + pow10 -= Constants::max_digits_shift; + + // Then make more room by bit shifting down by the max # of 2s + shifting_rep >>= Constants::max_bits_shift; + pow2 += Constants::max_bits_shift; + } + + // Last 10s-shift: multiply all remaining decimal places + // The multiplier is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = multiply_power10_64bit(shifting_rep, pow10); + } else { + shifting_rep = multiply_power10_32bit(shifting_rep, pow10); + } + + // Our shifting_rep is now the integer mantissa, return it and the powers of 2 + return std::pair{shifting_rep, pow2}; +} + +/** + * @brief Perform base-10 -> base-2 fixed-point conversion for pow10 < 0 + * + * @tparam DecimalRep The decimal integer type we are converting from + * @tparam FloatingType The type of floating point object we are converting to + * @param decimal_rep The decimal integer to convert + * @param pow10 The number of powers of 10 to apply to undo the scale factor + * @return A pair of the base-2 value and the remaining powers of 2 to be applied + */ +template )> +CUDF_HOST_DEVICE inline auto shift_to_binary_negpow(DecimalRep decimal_rep, int const pow10) +{ + // This is the reverse of shift_to_decimal_negpow(), see that for more details. + + // ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + + // We want to start with our significand bits at the top of the shifting range, + // so that we lose minimal information we need on intermediary right-shifts. + // Note that since we're shifting 2s up, we need num_2s_shift_buffer_bits space on the high side + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::num_2s_shift_buffer_bits; + int const shift_from = count_significant_bits(decimal_rep); + int const num_init_bit_shift = shift_up_to - shift_from; + int pow2 = -num_init_bit_shift; + + // Perform the initial bit shift + ShiftingRep shifting_rep; + if constexpr (sizeof(ShiftingRep) < sizeof(DecimalRep)) { + // Shift within DecimalRep before dropping to the smaller ShiftingRep + decimal_rep = (pow2 >= 0) ? (decimal_rep >> pow2) : (decimal_rep << -pow2); + shifting_rep = static_cast(decimal_rep); + } else { + // Scale up to ShiftingRep before shifting + shifting_rep = static_cast(decimal_rep); + shifting_rep = (pow2 >= 0) ? (shifting_rep >> pow2) : (shifting_rep << -pow2); + } + + // Convert to using positive values upfront, simpler than doing later. + int pow10_mag = -pow10; + + // Iterate, dividing by 10s and shifting up by 2s until we're almost done + while (pow10_mag > Constants::max_digits_shift) { + // More decimal places to shift than we have room: Divide the max number of 10s + shifting_rep /= Constants::max_digits_shift_pow; + pow10_mag -= Constants::max_digits_shift; + + // Then make more room by bit shifting up by the max # of 2s + shifting_rep <<= Constants::max_bits_shift; + pow2 -= Constants::max_bits_shift; + } + + // Last 10s-shift: Divdie all remaining decimal places. + // This divide result may not fit in the low half of the bit range + // But the divisor is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = divide_power10_64bit(shifting_rep, pow10_mag); + } else { + shifting_rep = divide_power10_32bit(shifting_rep, pow10_mag); + } + + // Our shifting_rep is now the integer mantissa, return it and the powers of 2 + return std::pair{shifting_rep, pow2}; +} + +/** + * @brief Perform integer decimal -> floating-point conversion + * + * @tparam FloatingType The type of floating-point object we are converting to + * @tparam Rep The decimal integer type we are converting from + * @param value The decimal integer to convert + * @param scale The base-10 scale factor for the input integer + * @return Floating-point representation of the scaled integral value + */ +template )> +CUDF_HOST_DEVICE inline FloatingType convert_integral_to_floating(Rep const& value, + scale_type const& scale) +{ + // Check the sign of the input + bool const is_negative = (value < 0); + + // Convert to unsigned for bit counting/shifting + using UnsignedType = cuda::std::make_unsigned_t; + auto const unsigned_value = [&]() -> UnsignedType { + // Must guard against minimum value, as we can't just negate it: not representable. + if (value == cuda::std::numeric_limits::min()) { return static_cast(value); } + + // No abs function for 128bit types, so have to do it manually. + if constexpr (cuda::std::is_same_v) { + return static_cast(is_negative ? -value : value); + } else { + return cuda::std::abs(value); + } + }(); + + // Shift by powers of 2 and 10 to get our integer mantissa + auto const [mantissa, pow2] = [&]() { + auto const pow10 = static_cast(scale); + if (pow10 >= 0) { + return shift_to_binary_pospow(unsigned_value, pow10); + } else { // pow10 < 0 + return shift_to_binary_negpow(unsigned_value, pow10); + } + }(); + + // Zero has special exponent bits, just handle it here + if (mantissa == 0) { return FloatingType(0.0f); } + + // Cast our integer mantissa to floating point + auto const floating = static_cast(mantissa); // IEEE-754 rounds to even + + // Apply the sign and the remaining powers of 2 + using converter = floating_converter; + auto const magnitude = converter::add_pow2(floating, pow2); + return converter::set_is_negative(magnitude, is_negative); +} + } // namespace detail /** @} */ // end of group diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 74c8bc67d3a..8a515335351 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -50,14 +51,19 @@ namespace cudf { */ template () && - cuda::std::is_floating_point_v>* = nullptr> + CUDF_ENABLE_IF(cuda::std::is_floating_point_v&& is_fixed_point())> CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::scale_type scale) { - using Rep = typename Fixed::rep; - auto const shifted = numeric::detail::shift(floating, scale); - numeric::scaled_integer scaled{static_cast(shifted), scale}; - return Fixed(scaled); + using Rep = typename Fixed::rep; + auto const value = [&]() { + if constexpr (Fixed::rad == numeric::Radix::BASE_10) { + return numeric::detail::convert_floating_to_integral(floating, scale); + } else { + return static_cast(numeric::detail::shift(floating, scale)); + } + }(); + + return Fixed(numeric::scaled_integer{value, scale}); } /** @@ -75,14 +81,17 @@ CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::sca */ template && - is_fixed_point()>* = nullptr> + CUDF_ENABLE_IF(cuda::std::is_floating_point_v&& is_fixed_point())> CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed) { - using Rep = typename Fixed::rep; - auto const casted = static_cast(fixed.value()); - auto const scale = numeric::scale_type{-fixed.scale()}; - return numeric::detail::shift(casted, scale); + using Rep = typename Fixed::rep; + if constexpr (Fixed::rad == numeric::Radix::BASE_10) { + return numeric::detail::convert_integral_to_floating(fixed.value(), fixed.scale()); + } else { + auto const casted = static_cast(fixed.value()); + auto const scale = numeric::scale_type{-fixed.scale()}; + return numeric::detail::shift(casted, scale); + } } /** @@ -95,7 +104,7 @@ CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed) */ template >* = nullptr> + CUDF_ENABLE_IF(cuda::std::is_floating_point_v)> CUDF_HOST_DEVICE Floating convert_to_floating(Input input) { if constexpr (is_fixed_point()) { diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp index ab7984d4b03..a222289216d 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cpp +++ b/cpp/tests/fixed_point/fixed_point_tests.cpp @@ -38,7 +38,7 @@ struct FixedPointTest : public cudf::test::BaseFixture {}; template struct FixedPointTestAllReps : public cudf::test::BaseFixture {}; -using RepresentationTypes = ::testing::Types; +using RepresentationTypes = ::testing::Types; TYPED_TEST_SUITE(FixedPointTestAllReps, RepresentationTypes); @@ -53,6 +53,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXConstruction) auto num4 = cudf::convert_floating_to_fixed(1.234567, scale_type(-4)); auto num5 = cudf::convert_floating_to_fixed(1.234567, scale_type(-5)); auto num6 = cudf::convert_floating_to_fixed(1.234567, scale_type(-6)); + auto num7 = cudf::convert_floating_to_fixed(0.0, scale_type(-4)); EXPECT_EQ(1, cudf::convert_fixed_to_floating(num0)); EXPECT_EQ(1.2, cudf::convert_fixed_to_floating(num1)); @@ -61,6 +62,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXConstruction) EXPECT_EQ(1.2345, cudf::convert_fixed_to_floating(num4)); EXPECT_EQ(1.23456, cudf::convert_fixed_to_floating(num5)); EXPECT_EQ(1.234567, cudf::convert_fixed_to_floating(num6)); + EXPECT_EQ(0.0, cudf::convert_fixed_to_floating(num7)); } TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) @@ -74,6 +76,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) auto num4 = cudf::convert_floating_to_fixed(-1.234567, scale_type(-4)); auto num5 = cudf::convert_floating_to_fixed(-1.234567, scale_type(-5)); auto num6 = cudf::convert_floating_to_fixed(-1.234567, scale_type(-6)); + auto num7 = cudf::convert_floating_to_fixed(-0.0, scale_type(-4)); EXPECT_EQ(-1, cudf::convert_fixed_to_floating(num0)); EXPECT_EQ(-1.2, cudf::convert_fixed_to_floating(num1)); @@ -82,6 +85,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) EXPECT_EQ(-1.2345, cudf::convert_fixed_to_floating(num4)); EXPECT_EQ(-1.23456, cudf::convert_fixed_to_floating(num5)); EXPECT_EQ(-1.234567, cudf::convert_fixed_to_floating(num6)); + EXPECT_EQ(-0.0, cudf::convert_fixed_to_floating(num7)); } TYPED_TEST(FixedPointTestAllReps, PaddedDecimalXXConstruction) @@ -99,14 +103,10 @@ TYPED_TEST(FixedPointTestAllReps, PaddedDecimalXXConstruction) EXPECT_EQ(1.1, cudf::convert_fixed_to_floating(a)); EXPECT_EQ(1.01, cudf::convert_fixed_to_floating(b)); - EXPECT_EQ(1, - cudf::convert_fixed_to_floating( - c)); // intentional (inherited problem from floating point) + EXPECT_EQ(1.001, cudf::convert_fixed_to_floating(c)); EXPECT_EQ(1.0001, cudf::convert_fixed_to_floating(d)); EXPECT_EQ(1.00001, cudf::convert_fixed_to_floating(e)); - EXPECT_EQ(1, - cudf::convert_fixed_to_floating( - f)); // intentional (inherited problem from floating point) + EXPECT_EQ(1.000001, cudf::convert_fixed_to_floating(f)); EXPECT_TRUE(1.000123 - cudf::convert_fixed_to_floating(x) < std::numeric_limits::epsilon()); @@ -153,6 +153,119 @@ TYPED_TEST(FixedPointTestAllReps, MoreSimpleBinaryFPConstruction) EXPECT_EQ(2.0625, cudf::convert_fixed_to_floating(num1)); } +TEST_F(FixedPointTest, PreciseFloatDecimal64Construction) +{ + // Need 9 decimal digits to uniquely represent all floats (numeric_limits::max_digits10()). + // Precise conversion: set the scale factor to 9 less than the order-of-magnitude. + // But with -9 scale factor decimal32 can overflow: use decimal64 instead. + + // Positive Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E7f, scale_type(-2)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E12f, scale_type(3)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E17f, scale_type(8)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E22f, scale_type(13)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E27f, scale_type(18)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E32f, scale_type(23)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E37f, scale_type(28)); + + EXPECT_EQ(3.141593E7f, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E12f, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E17f, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E22f, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E27f, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E32f, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E37f, cudf::convert_fixed_to_floating(num6)); + } + + // Negative Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E-7f, scale_type(-16)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E-12f, scale_type(-21)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E-17f, scale_type(-26)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E-22f, scale_type(-31)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E-27f, scale_type(-36)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E-32f, scale_type(-41)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E-37f, scale_type(-47)); + + EXPECT_EQ(3.141593E-7f, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E-12f, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E-17f, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E-22f, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E-27f, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E-32f, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E-37f, cudf::convert_fixed_to_floating(num6)); + + // Denormals + auto num7 = cudf::convert_floating_to_fixed(3.141593E-39f, scale_type(-48)); + auto num8 = cudf::convert_floating_to_fixed(3.141593E-41f, scale_type(-50)); + auto num9 = cudf::convert_floating_to_fixed(3.141593E-43f, scale_type(-52)); + auto num10 = cudf::convert_floating_to_fixed(FLT_TRUE_MIN, scale_type(-54)); + + EXPECT_EQ(3.141593E-39f, cudf::convert_fixed_to_floating(num7)); + EXPECT_EQ(3.141593E-41f, cudf::convert_fixed_to_floating(num8)); + EXPECT_EQ(3.141593E-43f, cudf::convert_fixed_to_floating(num9)); + EXPECT_EQ(FLT_TRUE_MIN, cudf::convert_fixed_to_floating(num10)); + } +} + +TEST_F(FixedPointTest, PreciseDoubleDecimal64Construction) +{ + // Need 17 decimal digits to uniquely represent all doubles (numeric_limits::max_digits10()). + // Precise conversion: set the scale factor to 17 less than the order-of-magnitude. + + using decimal64 = fixed_point; + + // Positive Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E8, scale_type(-9)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E58, scale_type(41)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E108, scale_type(91)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E158, scale_type(141)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E208, scale_type(191)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E258, scale_type(241)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E307, scale_type(290)); + + EXPECT_EQ(3.141593E8, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E58, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E108, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E158, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E208, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E258, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E307, cudf::convert_fixed_to_floating(num6)); + } + + // Negative Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E-8, scale_type(-25)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E-58, scale_type(-75)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E-108, scale_type(-125)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E-158, scale_type(-175)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E-208, scale_type(-225)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E-258, scale_type(-275)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E-308, scale_type(-325)); + + EXPECT_EQ(3.141593E-8, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E-58, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E-108, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E-158, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E-208, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E-258, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E-308, cudf::convert_fixed_to_floating(num6)); + + // Denormals + auto num7 = cudf::convert_floating_to_fixed(3.141593E-309, scale_type(-326)); + auto num8 = cudf::convert_floating_to_fixed(3.141593E-314, scale_type(-331)); + auto num9 = cudf::convert_floating_to_fixed(3.141593E-319, scale_type(-336)); + auto num10 = cudf::convert_floating_to_fixed(DBL_TRUE_MIN, scale_type(-341)); + + EXPECT_EQ(3.141593E-309, cudf::convert_fixed_to_floating(num7)); + EXPECT_EQ(3.141593E-314, cudf::convert_fixed_to_floating(num8)); + EXPECT_EQ(3.141593E-319, cudf::convert_fixed_to_floating(num9)); + EXPECT_EQ(DBL_TRUE_MIN, cudf::convert_fixed_to_floating(num10)); + } +} + TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXMath) { using decimalXX = fixed_point; @@ -442,8 +555,6 @@ void float_vector_test(ValueType const initial_value, int32_t const scale, Binop binop) { - using decimal32 = fixed_point; - std::vector vec1(size); std::vector vec2(size); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 1d6a3b3304a..7136b162c13 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3509,9 +3509,9 @@ void testCastFloatToDecimal() { @Test void testCastDoubleToDecimal() { testCastNumericToDecimalsAndBack(DType.FLOAT64, false, 0, - () -> ColumnVector.fromBoxedDoubles(1.0, 2.1, -3.23, null, 2.41281, (double) Long.MAX_VALUE), - () -> ColumnVector.fromBoxedDoubles(1.0, 2.0, -3.0, null, 2.0, (double) Long.MAX_VALUE), - new Long[]{1L, 2L, -3L, null, 2L, Long.MAX_VALUE} + () -> ColumnVector.fromBoxedDoubles(1.0, 2.1, -3.23, null, 2.41281, (double) Integer.MAX_VALUE), + () -> ColumnVector.fromBoxedDoubles(1.0, 2.0, -3.0, null, 2.0, (double) Integer.MAX_VALUE), + new Long[]{1L, 2L, -3L, null, 2L, (long) Integer.MAX_VALUE} ); testCastNumericToDecimalsAndBack(DType.FLOAT64, false, -2, () -> ColumnVector.fromBoxedDoubles(1.0, 2.1, -3.23, null, 2.41281, -55.01999), diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index c41a938f6ea..65f739bc74a 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -97,7 +97,7 @@ def test_typecast_from_float_to_decimal(request, data, from_dtype, to_dtype): pytest.mark.xfail( condition=version.parse(pa.__version__) >= version.parse("13.0.0") and from_dtype == np.dtype("float32") - and to_dtype.precision > 7, + and to_dtype.precision > 12, reason="https://github.com/rapidsai/cudf/issues/14169", ) )