mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 10:31:57 +00:00
commit
af4844b284
@ -1,32 +1,3 @@
|
|||||||
// Based on https://github.com/amdn/itoa and combined with our optimizations
|
|
||||||
//
|
|
||||||
//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-//
|
|
||||||
//
|
|
||||||
// The MIT License (MIT)
|
|
||||||
// Copyright (c) 2016 Arturo Martin-de-Nicolas
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included
|
|
||||||
// in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
// SOFTWARE.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstring>
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <base/defines.h>
|
#include <base/defines.h>
|
||||||
#include <base/extended_types.h>
|
#include <base/extended_types.h>
|
||||||
@ -34,99 +5,15 @@
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T>
|
ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value)
|
||||||
ALWAYS_INLINE inline constexpr T pow10(size_t x)
|
|
||||||
{
|
|
||||||
return x ? 10 * pow10<T>(x - 1) : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Division by a power of 10 is implemented using a multiplicative inverse.
|
|
||||||
// This strength reduction is also done by optimizing compilers, but
|
|
||||||
// presently the fastest results are produced by using the values
|
|
||||||
// for the multiplication and the shift as given by the algorithm
|
|
||||||
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
|
|
||||||
//
|
|
||||||
// http://www.agner.org/optimize/optimizing_assembly.pdf
|
|
||||||
//
|
|
||||||
// "Integer division by a constant (all processors)
|
|
||||||
// A floating point number can be divided by a constant by multiplying
|
|
||||||
// with the reciprocal. If we want to do the same with integers, we have
|
|
||||||
// to scale the reciprocal by 2n and then shift the product to the right
|
|
||||||
// by n. There are various algorithms for finding a suitable value of n
|
|
||||||
// and compensating for rounding errors. The algorithm described below
|
|
||||||
// was invented by Terje Mathisen, Norway, and not published elsewhere."
|
|
||||||
|
|
||||||
/// Division by constant is performed by:
|
|
||||||
/// 1. Adding 1 if needed;
|
|
||||||
/// 2. Multiplying by another constant;
|
|
||||||
/// 3. Shifting right by another constant.
|
|
||||||
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
|
|
||||||
struct Division
|
|
||||||
{
|
|
||||||
static constexpr bool add{add_};
|
|
||||||
static constexpr UInt multiplier{multiplier_};
|
|
||||||
static constexpr unsigned shift{shift_};
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Select a type with appropriate number of bytes from the list of types.
|
|
||||||
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
|
|
||||||
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
|
|
||||||
template <size_t N, typename T, typename... Ts>
|
|
||||||
struct SelectType
|
|
||||||
{
|
|
||||||
using Result = typename SelectType<N / 2, Ts...>::Result;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, typename... Ts>
|
|
||||||
struct SelectType<1, T, Ts...>
|
|
||||||
{
|
|
||||||
using Result = T;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/// Division by 10^N where N is the size of the type.
|
|
||||||
template <size_t N>
|
|
||||||
using DivisionBy10PowN = typename SelectType<
|
|
||||||
N,
|
|
||||||
Division<uint8_t, false, 205U, 11>, /// divide by 10
|
|
||||||
Division<uint16_t, true, 41943U, 22>, /// divide by 100
|
|
||||||
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
|
|
||||||
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
|
|
||||||
>::Result;
|
|
||||||
|
|
||||||
template <size_t N>
|
|
||||||
using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
|
|
||||||
|
|
||||||
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
|
|
||||||
template <size_t N>
|
|
||||||
struct QuotientAndRemainder
|
|
||||||
{
|
|
||||||
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
|
|
||||||
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
|
|
||||||
};
|
|
||||||
|
|
||||||
template <size_t N>
|
|
||||||
QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
|
|
||||||
{
|
|
||||||
constexpr DivisionBy10PowN<N> division;
|
|
||||||
|
|
||||||
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
|
|
||||||
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
|
|
||||||
|
|
||||||
return {quotient, remainder};
|
|
||||||
}
|
|
||||||
|
|
||||||
ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
|
|
||||||
{
|
{
|
||||||
*p = '0' + value;
|
*p = '0' + value;
|
||||||
++p;
|
return p + 1;
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Using a lookup table to convert binary numbers from 0 to 99
|
// Using a lookup table to convert binary numbers from 0 to 99
|
||||||
// into ascii characters as described by Andrei Alexandrescu in
|
// into ascii characters as described by Andrei Alexandrescu in
|
||||||
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
|
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
|
||||||
|
|
||||||
const char digits[201] = "00010203040506070809"
|
const char digits[201] = "00010203040506070809"
|
||||||
"10111213141516171819"
|
"10111213141516171819"
|
||||||
"20212223242526272829"
|
"20212223242526272829"
|
||||||
@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809"
|
|||||||
"70717273747576777879"
|
"70717273747576777879"
|
||||||
"80818283848586878889"
|
"80818283848586878889"
|
||||||
"90919293949596979899";
|
"90919293949596979899";
|
||||||
|
|
||||||
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
|
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
|
||||||
{
|
{
|
||||||
memcpy(p, &digits[value * 2], 2);
|
memcpy(p, &digits[value * 2], 2);
|
||||||
@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace convert
|
namespace jeaiii
|
||||||
{
|
{
|
||||||
template <typename UInt, size_t N = sizeof(UInt)>
|
/*
|
||||||
char * head(char * p, UInt u);
|
MIT License
|
||||||
template <typename UInt, size_t N = sizeof(UInt)>
|
|
||||||
char * tail(char * p, UInt u);
|
|
||||||
|
|
||||||
//===----------------------------------------------------------===//
|
Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
|
||||||
// head: find most significant digit, skip leading zeros
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
|
|
||||||
// "x" contains quotient and remainder after division by 10^N
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
// quotient is less than 10^N
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
template <size_t N>
|
in the Software without restriction, including without limitation the rights
|
||||||
ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
struct pair
|
||||||
{
|
{
|
||||||
p = head(p, UnsignedOfSize<N / 2>(x.quotient));
|
char dd[2];
|
||||||
p = tail(p, x.remainder);
|
constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor)
|
||||||
return p;
|
constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor)
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr struct
|
||||||
|
{
|
||||||
|
pair dd[100]{
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, //
|
||||||
|
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
|
||||||
|
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
|
||||||
|
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
|
||||||
|
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
|
||||||
|
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
|
||||||
|
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
|
||||||
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
|
||||||
|
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
|
||||||
|
};
|
||||||
|
pair fd[100]{
|
||||||
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
|
||||||
|
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
|
||||||
|
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
|
||||||
|
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
|
||||||
|
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
|
||||||
|
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
|
||||||
|
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
|
||||||
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
|
||||||
|
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
|
||||||
|
};
|
||||||
|
} digits;
|
||||||
|
|
||||||
|
constexpr UInt64 mask24 = (UInt64(1) << 24) - 1;
|
||||||
|
constexpr UInt64 mask32 = (UInt64(1) << 32) - 1;
|
||||||
|
constexpr UInt64 mask57 = (UInt64(1) << 57) - 1;
|
||||||
|
|
||||||
|
template <bool, class, class F>
|
||||||
|
struct _cond
|
||||||
|
{
|
||||||
|
using type = F;
|
||||||
|
};
|
||||||
|
template <class T, class F>
|
||||||
|
struct _cond<true, T, F>
|
||||||
|
{
|
||||||
|
using type = T;
|
||||||
|
};
|
||||||
|
template <bool B, class T, class F>
|
||||||
|
using cond = typename _cond<B, T, F>::type;
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i)
|
||||||
|
{
|
||||||
|
constexpr auto q = sizeof(T);
|
||||||
|
using U = cond<q == 1, char8_t, cond<q <= sizeof(UInt16), UInt16, cond<q <= sizeof(UInt32), UInt32, UInt64>>>;
|
||||||
|
|
||||||
|
// convert bool to int before test with unary + to silence warning if T happens to be bool
|
||||||
|
U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i);
|
||||||
|
|
||||||
|
if (n < U(1e2))
|
||||||
|
{
|
||||||
|
/// This is changed from the original jeaiii implementation
|
||||||
|
/// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions
|
||||||
|
/// and a memory access (no need to read digits.fd[n])
|
||||||
|
/// This is not true for pure random numbers, but that's not the common use case of a database
|
||||||
|
/// Original jeaii code
|
||||||
|
// *reinterpret_cast<pair *>(b) = digits.fd[n];
|
||||||
|
// return n < 10 ? b + 1 : b + 2;
|
||||||
|
return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n);
|
||||||
|
}
|
||||||
|
if (n < UInt32(1e6))
|
||||||
|
{
|
||||||
|
if (sizeof(U) == 1 || n < U(1e4))
|
||||||
|
{
|
||||||
|
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
|
||||||
|
if constexpr (sizeof(U) == 1)
|
||||||
|
b -= 1;
|
||||||
|
else
|
||||||
|
b -= n < U(1e3);
|
||||||
|
auto f2 = (f0 & mask24) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
|
||||||
|
return b + 4;
|
||||||
|
}
|
||||||
|
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||||
|
if constexpr (sizeof(U) == 2)
|
||||||
|
b -= 1;
|
||||||
|
else
|
||||||
|
b -= n < U(1e5);
|
||||||
|
auto f2 = (f0 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||||
|
auto f4 = (f2 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||||
|
return b + 6;
|
||||||
|
}
|
||||||
|
if (sizeof(U) == 4 || n < UInt64(1ull << 32ull))
|
||||||
|
{
|
||||||
|
if (n < U(1e8))
|
||||||
|
{
|
||||||
|
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||||
|
b -= n < U(1e7);
|
||||||
|
auto f2 = (f0 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||||
|
auto f4 = (f2 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||||
|
auto f6 = (f4 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||||
|
return b + 8;
|
||||||
|
}
|
||||||
|
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
|
||||||
|
b -= n < UInt32(1e9);
|
||||||
|
auto f2 = (f0 & mask57) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
|
||||||
|
auto f4 = (f2 & mask57) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
|
||||||
|
auto f6 = (f4 & mask57) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
|
||||||
|
auto f8 = (f6 & mask57) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
|
||||||
|
return b + 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
// "u" is less than 10^2*N
|
// if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings
|
||||||
template <typename UInt, size_t N>
|
UInt32 z = n % UInt32(1e8);
|
||||||
ALWAYS_INLINE inline char * head(char * p, UInt u)
|
UInt64 u = n / UInt32(1e8);
|
||||||
|
|
||||||
|
if (u < UInt32(1e2))
|
||||||
{
|
{
|
||||||
return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
|
// u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.dd[u];
|
||||||
|
b += 2;
|
||||||
}
|
}
|
||||||
|
else if (u < UInt32(1e6))
|
||||||
// recursion base case, selected when "u" is one byte
|
|
||||||
template <>
|
|
||||||
ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
|
||||||
{
|
{
|
||||||
return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
|
if (u < UInt32(1e4))
|
||||||
|
{
|
||||||
|
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
|
||||||
|
b -= u < UInt32(1e3);
|
||||||
|
auto f2 = (f0 & mask24) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
|
||||||
|
b += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
// tail: produce all digits including leading zeros
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
|
|
||||||
// recursive step, "u" is less than 10^2*N
|
|
||||||
template <typename UInt, size_t N>
|
|
||||||
ALWAYS_INLINE inline char * tail(char * p, UInt u)
|
|
||||||
{
|
|
||||||
QuotientAndRemainder<N> x = split<N>(u);
|
|
||||||
p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
|
|
||||||
p = tail(p, x.remainder);
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
// recursion base case, selected when "u" is one byte
|
|
||||||
template <>
|
|
||||||
ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
|
||||||
{
|
|
||||||
return outTwoDigits(p, u);
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
// large values are >= 10^2*N
|
|
||||||
// where x contains quotient and remainder after division by 10^N
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
template <size_t N>
|
|
||||||
ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
|
|
||||||
{
|
|
||||||
QuotientAndRemainder<N> y = split<N>(x.quotient);
|
|
||||||
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
|
|
||||||
p = tail(p, y.remainder);
|
|
||||||
p = tail(p, x.remainder);
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
// handle values of "u" that might be >= 10^2*N
|
|
||||||
// where N is the size of "u" in bytes
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
template <typename UInt, size_t N = sizeof(UInt)>
|
|
||||||
ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
|
|
||||||
{
|
|
||||||
if (u < pow10<UnsignedOfSize<N>>(N))
|
|
||||||
return head(p, UnsignedOfSize<N / 2>(u));
|
|
||||||
QuotientAndRemainder<N> x = split<N>(u);
|
|
||||||
|
|
||||||
return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
// selected when "u" is one byte
|
|
||||||
template <>
|
|
||||||
ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
|
||||||
{
|
|
||||||
if (u < 10)
|
|
||||||
return outDigit(p, u);
|
|
||||||
else if (u < 100)
|
|
||||||
return outTwoDigits(p, u);
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
p = outDigit(p, u / 100);
|
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u;
|
||||||
p = outTwoDigits(p, u % 100);
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||||
return p;
|
b -= u < UInt32(1e5);
|
||||||
|
auto f2 = (f0 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||||
|
auto f4 = (f2 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||||
|
b += 6;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (u < UInt32(1e8))
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
// handle unsigned and signed integral operands
|
|
||||||
//===----------------------------------------------------------===//
|
|
||||||
|
|
||||||
// itoa: handle unsigned integral operands (selected by SFINAE)
|
|
||||||
template <typename U>
|
|
||||||
requires(!std::is_signed_v<U> && std::is_integral_v<U>)
|
|
||||||
ALWAYS_INLINE inline char * itoa(U u, char * p)
|
|
||||||
{
|
{
|
||||||
return convert::uitoa(p, u);
|
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||||
|
b -= u < UInt32(1e7);
|
||||||
|
auto f2 = (f0 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||||
|
auto f4 = (f2 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||||
|
auto f6 = (f4 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||||
|
b += 8;
|
||||||
}
|
}
|
||||||
|
else if (u < UInt64(1ull << 32ull))
|
||||||
// itoa: handle signed integral operands (selected by SFINAE)
|
|
||||||
template <typename I, size_t N = sizeof(I)>
|
|
||||||
requires(std::is_signed_v<I> && std::is_integral_v<I>)
|
|
||||||
ALWAYS_INLINE inline char * itoa(I i, char * p)
|
|
||||||
{
|
{
|
||||||
// Need "mask" to be filled with a copy of the sign bit.
|
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u;
|
||||||
// If "i" is a negative value, then the result of "operator >>"
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
|
||||||
// is implementation-defined, though usually it is an arithmetic
|
b -= u < UInt32(1e9);
|
||||||
// right shift that replicates the sign bit.
|
auto f2 = (f0 & mask57) * 100;
|
||||||
// Use a conditional expression to be portable,
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
|
||||||
// a good optimizing compiler generates an arithmetic right shift
|
auto f4 = (f2 & mask57) * 100;
|
||||||
// and avoids the conditional branch.
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
|
||||||
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
|
auto f6 = (f4 & mask57) * 100;
|
||||||
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
|
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
|
||||||
// Cannot use std::abs() because the result is undefined
|
auto f8 = (f6 & mask57) * 100;
|
||||||
// in 2's complement systems for the most-negative value.
|
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
|
||||||
// Want to avoid conditional branch for performance reasons since
|
b += 10;
|
||||||
// CPU branch prediction will be ineffective when negative values
|
}
|
||||||
// occur randomly.
|
else
|
||||||
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
|
{
|
||||||
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
|
UInt32 y = u % UInt32(1e8);
|
||||||
// This yields the absolute value with the desired type without
|
u /= UInt32(1e8);
|
||||||
// using a conditional branch and without invoking undefined or
|
|
||||||
// implementation defined behavior:
|
// u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
|
||||||
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
|
if (u < UInt32(1e2))
|
||||||
// Unconditionally store a minus sign when producing digits
|
{
|
||||||
// in a forward direction and increment the pointer only if
|
*reinterpret_cast<pair *>(b) = digits.dd[u];
|
||||||
// the value is in fact negative.
|
b += 2;
|
||||||
// This avoids a conditional branch and is safe because we will
|
}
|
||||||
// always produce at least one digit and it will overwrite the
|
else
|
||||||
// minus sign when the value is not negative.
|
{
|
||||||
*p = '-';
|
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
|
||||||
p += (mask & 1);
|
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
|
||||||
p = convert::uitoa(p, u);
|
b -= u < UInt32(1e3);
|
||||||
return p;
|
auto f2 = (f0 & mask24) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
|
||||||
|
b += 4;
|
||||||
|
}
|
||||||
|
// do 8 digits
|
||||||
|
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
|
||||||
|
auto f2 = (f0 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||||
|
auto f4 = (f2 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||||
|
auto f6 = (f4 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||||
|
b += 8;
|
||||||
|
}
|
||||||
|
// do 8 digits
|
||||||
|
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
|
||||||
|
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
|
||||||
|
auto f2 = (f0 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||||
|
auto f4 = (f2 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||||
|
auto f6 = (f4 & mask32) * 100;
|
||||||
|
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||||
|
return b + 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
|
|||||||
{
|
{
|
||||||
/// If we the highest 64bit item is empty, we can print just the lowest item as u64
|
/// If we the highest 64bit item is empty, we can print just the lowest item as u64
|
||||||
if (_x.items[UInt128::_impl::little(1)] == 0)
|
if (_x.items[UInt128::_impl::little(1)] == 0)
|
||||||
return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
|
return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]);
|
||||||
|
|
||||||
/// Doing operations using __int128 is faster and we already rely on this feature
|
/// Doing operations using __int128 is faster and we already rely on this feature
|
||||||
using T = unsigned __int128;
|
using T = unsigned __int128;
|
||||||
@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
|
|||||||
current_block += max_multiple_of_hundred_blocks;
|
current_block += max_multiple_of_hundred_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
char * highest_part_print = convert::itoa(uint64_t(x), p);
|
char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x));
|
||||||
for (int i = 0; i < current_block; i++)
|
for (int i = 0; i < current_block; i++)
|
||||||
{
|
{
|
||||||
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
|
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
|
||||||
@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
|
|||||||
|
|
||||||
char * itoa(UInt8 i, char * p)
|
char * itoa(UInt8 i, char * p)
|
||||||
{
|
{
|
||||||
return convert::itoa(uint8_t(i), p);
|
return jeaiii::to_text_from_integer(p, uint8_t(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
char * itoa(Int8 i, char * p)
|
char * itoa(Int8 i, char * p)
|
||||||
{
|
{
|
||||||
return convert::itoa(int8_t(i), p);
|
return jeaiii::to_text_from_integer(p, int8_t(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
char * itoa(UInt128 i, char * p)
|
char * itoa(UInt128 i, char * p)
|
||||||
@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p)
|
|||||||
#define DEFAULT_ITOA(T) \
|
#define DEFAULT_ITOA(T) \
|
||||||
char * itoa(T i, char * p) \
|
char * itoa(T i, char * p) \
|
||||||
{ \
|
{ \
|
||||||
return convert::itoa(i, p); \
|
return jeaiii::to_text_from_integer(p, i); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FOR_MISSING_INTEGER_TYPES(M) \
|
#define FOR_MISSING_INTEGER_TYPES(M) \
|
||||||
|
Loading…
Reference in New Issue
Block a user