Merge branch 'master' into try-disabling-jemalloc-background-threads

This commit is contained in:
Antonio Andelic 2024-07-04 13:58:26 +02:00
commit 613ed1ebbf
138 changed files with 1945 additions and 1548 deletions

1
.gitattributes vendored
View File

@ -2,3 +2,4 @@ contrib/* linguist-vendored
*.h linguist-language=C++
tests/queries/0_stateless/data_json/* binary
tests/queries/0_stateless/*.reference -crlf
src/Core/SettingsChangesHistory.cpp merge=union

View File

@ -1,32 +1,3 @@
// Based on https://github.com/amdn/itoa and combined with our optimizations
//
//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-//
//
// The MIT License (MIT)
// Copyright (c) 2016 Arturo Martin-de-Nicolas
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//===----------------------------------------------------------------------===//
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>
#include <base/defines.h>
#include <base/extended_types.h>
@ -34,99 +5,15 @@
namespace
{
template <typename T>
ALWAYS_INLINE inline constexpr T pow10(size_t x)
{
return x ? 10 * pow10<T>(x - 1) : 1;
}
// Division by a power of 10 is implemented using a multiplicative inverse.
// This strength reduction is also done by optimizing compilers, but
// presently the fastest results are produced by using the values
// for the multiplication and the shift as given by the algorithm
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
//
// http://www.agner.org/optimize/optimizing_assembly.pdf
//
// "Integer division by a constant (all processors)
// A floating point number can be divided by a constant by multiplying
// with the reciprocal. If we want to do the same with integers, we have
// to scale the reciprocal by 2n and then shift the product to the right
// by n. There are various algorithms for finding a suitable value of n
// and compensating for rounding errors. The algorithm described below
// was invented by Terje Mathisen, Norway, and not published elsewhere."
/// Division by constant is performed by:
/// 1. Adding 1 if needed;
/// 2. Multiplying by another constant;
/// 3. Shifting right by another constant.
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
struct Division
{
static constexpr bool add{add_};
static constexpr UInt multiplier{multiplier_};
static constexpr unsigned shift{shift_};
};
/// Select a type with appropriate number of bytes from the list of types.
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
template <size_t N, typename T, typename... Ts>
struct SelectType
{
using Result = typename SelectType<N / 2, Ts...>::Result;
};
template <typename T, typename... Ts>
struct SelectType<1, T, Ts...>
{
using Result = T;
};
/// Division by 10^N where N is the size of the type.
template <size_t N>
using DivisionBy10PowN = typename SelectType<
N,
Division<uint8_t, false, 205U, 11>, /// divide by 10
Division<uint16_t, true, 41943U, 22>, /// divide by 100
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
>::Result;
template <size_t N>
using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
template <size_t N>
struct QuotientAndRemainder
{
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
};
template <size_t N>
QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
{
constexpr DivisionBy10PowN<N> division;
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
return {quotient, remainder};
}
ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value)
{
*p = '0' + value;
++p;
return p;
return p + 1;
}
// Using a lookup table to convert binary numbers from 0 to 99
// into ascii characters as described by Andrei Alexandrescu in
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
const char digits[201] = "00010203040506070809"
"10111213141516171819"
"20212223242526272829"
@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
{
memcpy(p, &digits[value * 2], 2);
@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
return p;
}
namespace convert
namespace jeaiii
{
template <typename UInt, size_t N = sizeof(UInt)>
char * head(char * p, UInt u);
template <typename UInt, size_t N = sizeof(UInt)>
char * tail(char * p, UInt u);
/*
MIT License
//===----------------------------------------------------------===//
// head: find most significant digit, skip leading zeros
//===----------------------------------------------------------===//
Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
// "x" contains quotient and remainder after division by 10^N
// quotient is less than 10^N
template <size_t N>
ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
struct pair
{
p = head(p, UnsignedOfSize<N / 2>(x.quotient));
p = tail(p, x.remainder);
return p;
}
char dd[2];
constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor)
constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor)
};
// "u" is less than 10^2*N
template <typename UInt, size_t N>
ALWAYS_INLINE inline char * head(char * p, UInt u)
constexpr struct
{
return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
}
pair dd[100]{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, //
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
};
pair fd[100]{
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
};
} digits;
// recursion base case, selected when "u" is one byte
template <>
ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
constexpr UInt64 mask24 = (UInt64(1) << 24) - 1;
constexpr UInt64 mask32 = (UInt64(1) << 32) - 1;
constexpr UInt64 mask57 = (UInt64(1) << 57) - 1;
template <bool, class, class F>
struct _cond
{
return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
}
//===----------------------------------------------------------===//
// tail: produce all digits including leading zeros
//===----------------------------------------------------------===//
// recursive step, "u" is less than 10^2*N
template <typename UInt, size_t N>
ALWAYS_INLINE inline char * tail(char * p, UInt u)
using type = F;
};
template <class T, class F>
struct _cond<true, T, F>
{
QuotientAndRemainder<N> x = split<N>(u);
p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
p = tail(p, x.remainder);
return p;
}
using type = T;
};
template <bool B, class T, class F>
using cond = typename _cond<B, T, F>::type;
// recursion base case, selected when "u" is one byte
template <>
ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
template <class T>
inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i)
{
return outTwoDigits(p, u);
}
constexpr auto q = sizeof(T);
using U = cond<q == 1, char8_t, cond<q <= sizeof(UInt16), UInt16, cond<q <= sizeof(UInt32), UInt32, UInt64>>>;
//===----------------------------------------------------------===//
// large values are >= 10^2*N
// where x contains quotient and remainder after division by 10^N
//===----------------------------------------------------------===//
template <size_t N>
ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
{
QuotientAndRemainder<N> y = split<N>(x.quotient);
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
p = tail(p, y.remainder);
p = tail(p, x.remainder);
return p;
}
// convert bool to int before test with unary + to silence warning if T happens to be bool
U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i);
//===----------------------------------------------------------===//
// handle values of "u" that might be >= 10^2*N
// where N is the size of "u" in bytes
//===----------------------------------------------------------===//
template <typename UInt, size_t N = sizeof(UInt)>
ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
{
if (u < pow10<UnsignedOfSize<N>>(N))
return head(p, UnsignedOfSize<N / 2>(u));
QuotientAndRemainder<N> x = split<N>(u);
if (n < U(1e2))
{
/// This is changed from the original jeaiii implementation
/// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions
/// and a memory access (no need to read digits.fd[n])
/// This is not true for pure random numbers, but that's not the common use case of a database
/// Original jeaii code
// *reinterpret_cast<pair *>(b) = digits.fd[n];
// return n < 10 ? b + 1 : b + 2;
return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n);
}
if (n < UInt32(1e6))
{
if (sizeof(U) == 1 || n < U(1e4))
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
if constexpr (sizeof(U) == 1)
b -= 1;
else
b -= n < U(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
return b + 4;
}
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
if constexpr (sizeof(U) == 2)
b -= 1;
else
b -= n < U(1e5);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
return b + 6;
}
if (sizeof(U) == 4 || n < UInt64(1ull << 32ull))
{
if (n < U(1e8))
{
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= n < U(1e7);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
return b + 8;
}
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
b -= n < UInt32(1e9);
auto f2 = (f0 & mask57) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
auto f4 = (f2 & mask57) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
auto f6 = (f4 & mask57) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
auto f8 = (f6 & mask57) * 100;
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
return b + 10;
}
return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
}
// if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings
UInt32 z = n % UInt32(1e8);
UInt64 u = n / UInt32(1e8);
// selected when "u" is one byte
template <>
ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{
if (u < 10)
return outDigit(p, u);
else if (u < 100)
return outTwoDigits(p, u);
if (u < UInt32(1e2))
{
// u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
*reinterpret_cast<pair *>(b) = digits.dd[u];
b += 2;
}
else if (u < UInt32(1e6))
{
if (u < UInt32(1e4))
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
b -= u < UInt32(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
b += 4;
}
else
{
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= u < UInt32(1e5);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
b += 6;
}
}
else if (u < UInt32(1e8))
{
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= u < UInt32(1e7);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
b += 8;
}
else if (u < UInt64(1ull << 32ull))
{
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
b -= u < UInt32(1e9);
auto f2 = (f0 & mask57) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
auto f4 = (f2 & mask57) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
auto f6 = (f4 & mask57) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
auto f8 = (f6 & mask57) * 100;
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
b += 10;
}
else
{
p = outDigit(p, u / 100);
p = outTwoDigits(p, u % 100);
return p;
UInt32 y = u % UInt32(1e8);
u /= UInt32(1e8);
// u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
if (u < UInt32(1e2))
{
*reinterpret_cast<pair *>(b) = digits.dd[u];
b += 2;
}
else
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
b -= u < UInt32(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
b += 4;
}
// do 8 digits
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
b += 8;
}
}
//===----------------------------------------------------------===//
// handle unsigned and signed integral operands
//===----------------------------------------------------------===//
// itoa: handle unsigned integral operands (selected by SFINAE)
template <typename U>
requires(!std::is_signed_v<U> && std::is_integral_v<U>)
ALWAYS_INLINE inline char * itoa(U u, char * p)
{
return convert::uitoa(p, u);
}
// itoa: handle signed integral operands (selected by SFINAE)
template <typename I, size_t N = sizeof(I)>
requires(std::is_signed_v<I> && std::is_integral_v<I>)
ALWAYS_INLINE inline char * itoa(I i, char * p)
{
// Need "mask" to be filled with a copy of the sign bit.
// If "i" is a negative value, then the result of "operator >>"
// is implementation-defined, though usually it is an arithmetic
// right shift that replicates the sign bit.
// Use a conditional expression to be portable,
// a good optimizing compiler generates an arithmetic right shift
// and avoids the conditional branch.
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
// Cannot use std::abs() because the result is undefined
// in 2's complement systems for the most-negative value.
// Want to avoid conditional branch for performance reasons since
// CPU branch prediction will be ineffective when negative values
// occur randomly.
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
// This yields the absolute value with the desired type without
// using a conditional branch and without invoking undefined or
// implementation defined behavior:
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
// Unconditionally store a minus sign when producing digits
// in a forward direction and increment the pointer only if
// the value is in fact negative.
// This avoids a conditional branch and is safe because we will
// always produce at least one digit and it will overwrite the
// minus sign when the value is not negative.
*p = '-';
p += (mask & 1);
p = convert::uitoa(p, u);
return p;
// do 8 digits
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
return b + 8;
}
}
@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
{
/// If we the highest 64bit item is empty, we can print just the lowest item as u64
if (_x.items[UInt128::_impl::little(1)] == 0)
return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]);
/// Doing operations using __int128 is faster and we already rely on this feature
using T = unsigned __int128;
@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
current_block += max_multiple_of_hundred_blocks;
}
char * highest_part_print = convert::itoa(uint64_t(x), p);
char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x));
for (int i = 0; i < current_block; i++)
{
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
char * itoa(UInt8 i, char * p)
{
return convert::itoa(uint8_t(i), p);
return jeaiii::to_text_from_integer(p, uint8_t(i));
}
char * itoa(Int8 i, char * p)
{
return convert::itoa(int8_t(i), p);
return jeaiii::to_text_from_integer(p, int8_t(i));
}
char * itoa(UInt128 i, char * p)
@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p)
#define DEFAULT_ITOA(T) \
char * itoa(T i, char * p) \
{ \
return convert::itoa(i, p); \
return jeaiii::to_text_from_integer(p, i); \
}
#define FOR_MISSING_INTEGER_TYPES(M) \

@ -1 +1 @@
Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f
Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c
Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c
Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 38431d111781843741a781a57a6381a527d900a4
Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07

View File

@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
- `TDigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
[TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns.
- `Uniq`
Estimate the number of distinct values of a column by HyperLogLog.
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
## Column-level Settings {#column-level-settings}

View File

@ -2536,7 +2536,7 @@ Possible values:
- 0 — Optimization disabled.
- 1 — Optimization enabled.
Default value: `0`.
Default value: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns:
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
```
:::note
:::note
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
:::

View File

@ -2077,7 +2077,7 @@ SELECT * FROM test_table
- 0 — оптимизация отключена.
- 1 — оптимизация включена.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -134,10 +134,6 @@
# include <Server/KeeperTCPHandlerFactory.h>
#endif
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#endif
#if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp>
# include <azure/core/diagnostics/logger.hpp>
@ -177,34 +173,10 @@ namespace ProfileEvents
namespace fs = std::filesystem;
#if USE_JEMALLOC
static bool jemallocOptionEnabled(const char *name)
{
bool value;
size_t size = sizeof(value);
if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
throw Poco::SystemException("mallctl() failed");
return value;
}
#else
static bool jemallocOptionEnabled(const char *) { return false; }
#endif
int mainEntryClickHouseServer(int argc, char ** argv)
{
DB::Server app;
if (jemallocOptionEnabled("opt.background_thread"))
{
LOG_ERROR(&app.logger(),
"jemalloc.background_thread was requested, "
"however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
"and also background_thread is not compatible with ClickHouse watchdog "
"(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
}
/// Do not fork separate process from watchdog if we attached to terminal.
/// Otherwise it breaks gdb usage.
/// Can be overridden by environment variable (cannot use server config at this moment).

View File

@ -506,6 +506,14 @@ let user = 'default';
let password = '';
let add_http_cors_header = (location.protocol != 'file:');
const current_url = new URL(window.location);
/// Substitute user name if it's specified in the query string
const user_from_url = current_url.searchParams.get('user');
if (user_from_url) {
user = user_from_url;
}
const errorCodeMessageMap = {
516: 'Error authenticating with database. Please check your connection params and try again.'
}

View File

@ -9,6 +9,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -164,32 +165,15 @@ private:
auto aggregate_function_clone = aggregate_function->clone();
auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>();
aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument };
resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name);
resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name);
arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone);
resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName());
resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext());
return arithmetic_function_clone;
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -11,6 +11,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -18,19 +19,18 @@ namespace DB
namespace
{
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor<ComparisonTupleEliminationPassVisitor>
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>
{
public:
explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_)
: context(std::move(context_))
{}
using Base = InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>;
using Base::Base;
static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
{
return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION;
}
void visitImpl(QueryTreeNodePtr & node) const
void enterImpl(QueryTreeNodePtr & node) const
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
@ -171,13 +171,13 @@ private:
{
auto result_function = std::make_shared<FunctionNode>("and");
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext());
if (comparison_function_name == "notEquals")
{
auto not_function = std::make_shared<FunctionNode>("not");
not_function->getArguments().getNodes().push_back(std::move(result_function));
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext());
result_function = std::move(not_function);
}
@ -197,18 +197,10 @@ private:
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext());
return comparison_function;
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
ContextPtr context;
};
}

View File

@ -9,6 +9,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -77,11 +78,9 @@ public:
/// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
}
};

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeVariant.h>
#include <Storages/IStorage.h>
@ -16,6 +17,9 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/JoinNode.h>
namespace DB
{
@ -23,202 +27,410 @@ namespace DB
namespace
{
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>
struct ColumnContext
{
NameAndTypePair column;
QueryTreeNodePtr column_source;
ContextPtr context;
};
using NodeToSubcolumnTransformer = std::function<void(QueryTreeNodePtr &, FunctionNode &, ColumnContext &)>;
void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `length(argument)` with `argument.size0`
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
template <bool positive>
void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive
/// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, ctx.column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
const auto * function_name = positive ? "equals" : "notEquals";
resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context);
}
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
if (value.getType() == Field::Types::UInt64)
return data_type_tuple.getNameByPosition(value.get<UInt64>());
return "";
}
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
return "";
}
template <typename DataType>
void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`.
/// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`.
auto & function_arguments_nodes = function_node.getArguments().getNodes();
if (function_arguments_nodes.size() != 2)
return;
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (!second_argument_constant_node)
return;
const auto & data_type_concrete = assert_cast<const DataType &>(*ctx.column.type);
auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete);
if (subcolumn_name.empty())
return;
NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
std::map<std::pair<TypeIndex, String>, NodeToSubcolumnTransformer> node_transformers =
{
{
{TypeIndex::Array, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Array, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Map, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "mapKeys"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapValues"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapContains"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
const auto & data_type_map = assert_cast<const DataTypeMap &>(*ctx.column.type);
NameAndTypePair column{ctx.column.name + ".keys", std::make_shared<DataTypeArray>(data_type_map.getKeyType())};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto has_function_argument = std::make_shared<ColumnNode>(column, ctx.column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context);
},
},
{
{TypeIndex::Nullable, "count"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto new_column_node = std::make_shared<ColumnNode>(column, ctx.column_source);
auto function_node_not = std::make_shared<FunctionNode>("not");
function_node_not->getArguments().getNodes().push_back(std::move(new_column_node));
resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context);
function_arguments_nodes = {std::move(function_node_not)};
resolveAggregateFunctionNodeByName(function_node, "sum");
},
},
{
{TypeIndex::Nullable, "isNull"},
[](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Nullable, "isNotNull"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, ctx.column_source)};
resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context);
},
},
{
{TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement<DataTypeTuple>,
},
{
{TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement<DataTypeVariant>,
},
};
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return {};
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2)
return {};
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set")
return {};
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return {};
const auto & storage = table_node->getStorage();
const auto & storage_snapshot = table_node->getStorageSnapshot();
auto column = first_argument_column_node->getColumn();
if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata))
return {};
auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name);
if (!column_in_table || !column_in_table->type->equals(*column.type))
return {};
return std::make_tuple(function_node, first_argument_column_node, table_node);
}
/// First pass collects info about identifiers to determine which identifiers are allowed to optimize.
class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>;
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>;
using Base::Base;
void enterImpl(const QueryTreeNodePtr & node)
{
if (!getSettings().optimize_functions_to_subcolumns)
return;
if (auto * table_node = node->as<TableNode>())
{
enterImpl(*table_node);
return;
}
if (auto * column_node = node->as<ColumnNode>())
{
enterImpl(*column_node);
return;
}
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node);
if (function_node && first_argument_node && table_node)
{
enterImpl(*function_node, *first_argument_node, *table_node);
return;
}
if (const auto * join_node = node->as<JoinNode>())
{
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls;
return;
}
if (const auto * query_node = node->as<QueryNode>())
{
if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets())
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls;
return;
}
}
std::unordered_set<Identifier> getIdentifiersToOptimize() const
{
if (can_wrap_result_columns_with_nullable)
{
/// Do not optimize if we have JOIN with setting join_use_null.
/// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls.
/// It may change the behaviour if subcolumn can be converted
/// to Nullable while the original column cannot (e.g. for Array type).
return {};
}
/// Do not optimize if full column is requested in other context.
/// It doesn't make sense because it doesn't reduce amount of read data
/// and optimized functions are not computation heavy. But introducing
/// new identifier complicates query analysis and may break it.
///
/// E.g. query:
/// SELECT n FROM table GROUP BY n HAVING isNotNull(n)
/// may be optimized to incorrect query:
/// SELECT n FROM table GROUP BY n HAVING not(n.null)
/// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys)
///
/// Do not optimize index columns (primary, min-max, secondary),
/// because otherwise analysis of indexes may be broken.
/// TODO: handle subcolumns in index analysis.
std::unordered_set<Identifier> identifiers_to_optimize;
for (const auto & [identifier, count] : optimized_identifiers_count)
{
if (all_key_columns.contains(identifier))
continue;
auto it = identifiers_count.find(identifier);
if (it != identifiers_count.end() && it->second == count)
identifiers_to_optimize.insert(identifier);
}
return identifiers_to_optimize;
}
private:
std::unordered_set<Identifier> all_key_columns;
std::unordered_map<Identifier, UInt64> identifiers_count;
std::unordered_map<Identifier, UInt64> optimized_identifiers_count;
NameSet processed_tables;
bool can_wrap_result_columns_with_nullable = false;
void enterImpl(const TableNode & table_node)
{
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
if (processed_tables.emplace(table_name).second)
return;
auto add_key_columns = [&](const auto & key_columns)
{
for (const auto & column_name : key_columns)
{
Identifier identifier({table_name, column_name});
all_key_columns.insert(identifier);
}
};
const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata;
const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey();
const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey();
add_key_columns(primary_key_columns);
add_key_columns(partition_key_columns);
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
const auto & index_columns = index.expression->getRequiredColumns();
add_key_columns(index_columns);
}
}
void enterImpl(const ColumnNode & column_node)
{
if (column_node.getColumnName() == "__grouping_set")
return;
auto column_source = column_node.getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column_node.getColumnName()});
++identifiers_count[qualified_name];
}
void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node)
{
/// For queries with FINAL converting function to subcolumn may alter
/// special merging algorithms and produce wrong result of query.
if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal())
return;
const auto & column = first_argument_column_node.getColumn();
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column.name});
if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()}))
++optimized_identifiers_count[qualified_name];
}
};
/// Second pass optimizes functions to subcolumns for allowed identifiers.
class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>
{
private:
std::unordered_set<Identifier> identifiers_to_optimize;
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>;
using Base::Base;
FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set<Identifier> identifiers_to_optimize_)
: Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_))
{
}
void enterImpl(QueryTreeNodePtr & node) const
{
if (!getSettings().optimize_functions_to_subcolumns)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
size_t function_arguments_nodes_size = function_arguments_nodes.size();
if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2)
return;
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node)
return;
if (first_argument_column_node->getColumnName() == "__grouping_set")
return;
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
const auto & storage = table_node->getStorage();
if (!storage->supportsSubcolumns())
auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node);
if (!function_node || !first_argument_column_node || !table_node)
return;
auto column = first_argument_column_node->getColumn();
WhichDataType column_type(column.type);
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
const auto & function_name = function_node->getFunctionName();
Identifier qualified_name({table_name, column.name});
if (!identifiers_to_optimize.contains(qualified_name))
return;
if (function_arguments_nodes_size == 1)
auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()});
if (transformer_it != node_transformers.end())
{
if (column_type.isArray())
{
if (function_name == "length")
{
/// Replace `length(array_argument)` with `array_argument.size0`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "empty")
{
/// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "equals");
}
else if (function_name == "notEmpty")
{
/// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "notEquals");
}
}
else if (column_type.isNullable())
{
if (function_name == "isNull")
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "isNotNull")
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, column_source)};
resolveOrdinaryFunctionNode(*function_node, "not");
}
}
else if (column_type.isMap())
{
if (function_name == "mapKeys")
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
column.name += ".keys";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapValues")
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
column.name += ".values";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
}
ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()};
transformer_it->second(node, *function_node, ctx);
}
else
{
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node)
{
/** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)`
* with `tuple_argument.column_name`.
*/
const auto & tuple_element_constant_value = second_argument_constant_node->getValue();
const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType();
const auto & data_type_tuple = assert_cast<const DataTypeTuple &>(*column.type);
String subcolumn_name;
if (tuple_element_constant_value_type == Field::Types::String)
{
subcolumn_name = tuple_element_constant_value.get<const String &>();
}
else if (tuple_element_constant_value_type == Field::Types::UInt64)
{
auto tuple_column_index = tuple_element_constant_value.get<UInt64>();
subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index);
}
else
{
return;
}
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap())
{
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
column.name += ".keys";
column.type = std::make_shared<DataTypeArray>(data_type_map.getKeyType());
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNode(*function_node, "has");
}
}
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
@ -226,8 +438,15 @@ private:
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
FunctionToSubcolumnsVisitor visitor(context);
visitor.visit(query_tree_node);
FunctionToSubcolumnsVisitorFirstPass first_visitor(context);
first_visitor.visit(query_tree_node);
auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize();
if (identifiers_to_optimize.empty())
return;
FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize));
second_visitor.visit(query_tree_node);
}
}

View File

@ -6,6 +6,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypesNumber.h>
@ -47,25 +48,17 @@ public:
if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
}
else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1)
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
}
}
private:
static void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -5,6 +5,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h>

View File

@ -74,8 +74,7 @@ public:
new_arguments[1] = std::move(if_arguments_nodes[0]);
function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
}
}
else if (first_const_node)
@ -104,27 +103,10 @@ public:
new_arguments[1] = std::move(not_function);
function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
}
}
}
private:
static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + "If",
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -73,23 +73,24 @@ public:
const auto lhs = std::make_shared<FunctionNode>("sum");
lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*lhs, column_type);
resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName());
const auto rhs_count = std::make_shared<FunctionNode>("count");
rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*rhs_count, column_type);
resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("multiply");
rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]);
rhs->getArguments().getNodes().push_back(rhs_count);
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
if (column_id == 0)
new_node->getArguments().getNodes() = {lhs, rhs};
else if (column_id == 1)
new_node->getArguments().getNodes() = {rhs, lhs};
resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext());
if (!new_node)
return;
@ -100,28 +101,7 @@ public:
res = createCastFunction(res, function_node->getResultType(), getContext());
node = std::move(res);
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
const auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),
NullsAction::EMPTY,
{argument_type},
{},
properties);
function_node.resolveAsAggregateFunction(aggregate_function);
}
};
}

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/Utils.h>
#include <Functions/FunctionFactory.h>
@ -65,7 +66,8 @@ public:
auto multiplier_node = function_node_arguments_nodes[0];
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (constant_value_literal.get<UInt64>() != 1)
{
@ -115,7 +117,7 @@ public:
function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_true_condition_value != 1)
{
@ -144,7 +146,7 @@ public:
function_node_arguments_nodes[0] = std::move(not_function);
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_false_condition_value != 1)
{
@ -156,15 +158,6 @@ public:
}
private:
static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
{
auto multiply_function_node = std::make_shared<FunctionNode>("multiply");

View File

@ -7,6 +7,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB

View File

@ -7,6 +7,7 @@
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -184,11 +185,8 @@ public:
/// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
resolveAggregateFunctionNodeByName(*function_node, "count");
}
}
};

View File

@ -636,16 +636,16 @@ private:
bool has_function = false;
};
inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node)
inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name)
{
Array parameters;
for (const auto & param : function_node->getParameters())
for (const auto & param : function_node.getParameters())
{
auto * constant = param->as<ConstantNode>();
parameters.push_back(constant->getValue());
}
const auto & function_node_argument_nodes = function_node->getArguments().getNodes();
const auto & function_node_argument_nodes = function_node.getArguments().getNodes();
DataTypes argument_types;
argument_types.reserve(function_node_argument_nodes.size());
@ -655,7 +655,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties);
return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties);
}
}
@ -736,11 +736,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context)
{
if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull")
return;
function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node));
function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
}
else if (function_node->isWindowFunction())
{
function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node));
function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
}
}
@ -793,6 +793,18 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node;
}
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context)
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name)
{
auto aggregate_function = resolveAggregateFunction(function_node, function_name);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
/** Returns:
* {_, false} - multiple sources
* {nullptr, true} - no sources (for constants)

View File

@ -112,6 +112,14 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Resolves function node as ordinary function with given name.
/// Arguments and parameters are taken from the node.
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context);
/// Resolves function node as aggregate function with given name.
/// Arguments and parameters are taken from the node.
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name);
/// Checks that node has only one source and returns it
QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node);

View File

@ -316,7 +316,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
}
else if (replace)
{
with_element.removeAttribute("replace");
NodePtr new_node = config->importNode(with_node, true);
config_root->replaceChild(new_node, config_node);
}

View File

@ -238,7 +238,12 @@
\
M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \
\
M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \
M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \
M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \
M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \
M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \
M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \
\
M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \
M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \
\

View File

@ -623,7 +623,7 @@ class IColumn;
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \

View File

@ -58,6 +58,7 @@ String ClickHouseVersion::toString() const
static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory::SettingsChanges>> settings_changes_history_initializer =
{
{"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
{"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},

View File

@ -32,7 +32,7 @@ namespace ErrorCodes
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
extern const int ILLEGAL_INDEX;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
}
@ -286,7 +286,7 @@ std::optional<size_t> DataTypeTuple::tryGetPositionByName(const String & name) c
String DataTypeTuple::getNameByPosition(size_t i) const
{
if (i == 0 || i > names.size())
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size());
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size());
return names[i - 1];
}

View File

@ -202,7 +202,7 @@ public:
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_arguments);
validateFunctionArguments(*this, arguments, mandatory_arguments);
return std::make_shared<DataTypeString>();
}

View File

@ -95,22 +95,21 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName
return res;
}
void validateArgumentType(const IFunction & func, const DataTypes & arguments,
size_t argument_index, bool (* validator_func)(const IDataType &),
const char * expected_type_description)
{
if (arguments.size() <= argument_index)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}",
func.getName());
const auto & argument = arguments[argument_index];
if (!validator_func(*argument))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}",
argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description);
}
namespace
{
String withOrdinalEnding(size_t i)
{
switch (i)
{
case 0: return "1st";
case 1: return "2nd";
case 2: return "3rd";
default: return std::to_string(i) + "th";
}
}
void validateArgumentsImpl(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
size_t argument_offset,
@ -120,20 +119,18 @@ void validateArgumentsImpl(const IFunction & func,
{
const auto argument_index = i + argument_offset;
if (argument_index >= arguments.size())
{
break;
}
const auto & arg = arguments[i + argument_offset];
const auto & descriptor = descriptors[i];
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
throw Exception(error_code,
"Illegal type of argument #{}{} of function {}{}{}",
argument_offset + i + 1, // +1 is for human-friendly 1-based indexing
(descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}),
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
withOrdinalEnding(argument_offset + i),
descriptor.name,
func.getName(),
(descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}),
(arg.type ? ", got " + arg.type->getName() : String{}));
descriptor.type_name,
arg.type ? arg.type->getName() : "<?>");
}
}
@ -141,52 +138,42 @@ void validateArgumentsImpl(const IFunction & func,
int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const
{
if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type)))
if (name.empty() || type_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set");
if (type_validator && (data_type == nullptr || !type_validator(*data_type)))
return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT;
if (column_validator_func && (column == nullptr || !column_validator_func(*column)))
if (column_validator && (column == nullptr || !column_validator(*column)))
return ErrorCodes::ILLEGAL_COLUMN;
return 0;
}
void validateFunctionArgumentTypes(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
void validateFunctionArguments(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
{
if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size())
{
auto join_argument_types = [](const auto & args, const String sep = ", ")
{
String result;
for (const auto & a : args)
{
using A = std::decay_t<decltype(a)>;
if constexpr (std::is_same_v<A, FunctionArgumentDescriptor>)
{
if (a.argument_name)
result += "'" + std::string(a.argument_name) + "' : ";
if (a.expected_type_description)
result += a.expected_type_description;
}
else if constexpr (std::is_same_v<A, ColumnWithTypeAndName>)
result += a.type->getName();
auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
result += sep;
}
if (!args.empty())
result.erase(result.end() - sep.length(), result.end());
return result;
};
String expected_args_string;
if (!mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args));
else if (!mandatory_args.empty() && optional_args.empty())
expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
else if (mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args));
else
expected_args_string = "0 arguments";
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})",
func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}),
mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""),
join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : ""));
"An incorrect number of arguments was specified for function '{}'. Expected {}, got {}",
func.getName(),
expected_args_string,
fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments)));
}
validateArgumentsImpl(func, arguments, 0, mandatory_args);

View File

@ -115,77 +115,58 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col);
/// column if it is nullable.
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns);
/// Checks argument type at specified index with predicate.
/// throws if there is no argument at specified index or if predicate returns false.
void validateArgumentType(const IFunction & func, const DataTypes & arguments,
size_t argument_index, bool (* validator_func)(const IDataType &),
const char * expected_type_description);
/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected
*
* Also it is used to generate function description when arguments do not match expected ones.
* Any field can be null:
* `argument_name` - if not null, reported via type check errors.
* `expected_type_description` - if not null, reported via type check errors.
* `type_validator_func` - if not null, used to validate data type of function argument.
* `column_validator_func` - if not null, used to validate column of function argument.
*/
/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided
/// arguments match the expected arguments.
struct FunctionArgumentDescriptor
{
const char * argument_name;
/// The argument name, e.g. "longitude".
/// Should not be empty.
std::string_view name;
/// A function which validates the argument data type.
/// May be nullptr.
using TypeValidator = bool (*)(const IDataType &);
TypeValidator type_validator_func;
TypeValidator type_validator;
/// A function which validates the argument column.
/// May be nullptr.
using ColumnValidator = bool (*)(const IColumn &);
ColumnValidator column_validator_func;
ColumnValidator column_validator;
const char * expected_type_description;
/// The expected argument type, e.g. "const String" or "UInt64".
/// Should not be empty.
std::string_view type_name;
/** Validate argument type and column.
*
* Returns non-zero error code if:
* Validator != nullptr && (Value == nullptr || Validator(*Value) == false)
* For:
* Validator is either `type_validator_func` or `column_validator_func`
* Value is either `data_type` or `column` respectively.
* ILLEGAL_TYPE_OF_ARGUMENT if type validation fails
*
*/
/// Validate argument type and column.
int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const;
};
using FunctionArgumentDescriptors = std::vector<FunctionArgumentDescriptor>;
/** Validate that function arguments match specification.
*
* Designed to simplify argument validation for functions with variable arguments
* (e.g. depending on result type or other trait).
* First, checks that number of arguments is as expected (including optional arguments).
* Second, checks that mandatory args present and have valid type.
* Third, checks optional arguments types, skipping ones that are missing.
*
* Please note that if you have several optional arguments, like f([a, b, c]),
* only these calls are considered valid:
* f(a)
* f(a, b)
* f(a, b, c)
*
* But NOT these: f(a, c), f(b, c)
* In other words you can't omit middle optional arguments (just like in regular C++).
*
* If any mandatory arg is missing, throw an exception, with explicit description of expected arguments.
*/
void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args = {});
/// Validates that the user-provided arguments match the expected arguments.
///
/// Checks that
/// - the number of provided arguments matches the number of mandatory/optional arguments,
/// - all mandatory arguments are present and have the right type,
/// - optional arguments - if present - have the right type.
///
/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid:
/// f(a)
/// f(a, b)
/// f(a, b, c)
/// but these are NOT:
/// f(a, c)
/// f(b, c)
void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args = {});
/// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw.
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns.
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
*/
/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns.
/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count);
struct NullPresence

View File

@ -40,7 +40,7 @@ public:
{"replacement", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeString>();
}

View File

@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
validateFunctionArguments(func, arguments, mandatory_args, optional_args);
}
static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
validateFunctionArguments(func, arguments, mandatory_args, optional_args);
}
}

View File

@ -47,7 +47,7 @@ public:
FunctionArgumentDescriptors args{
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime64), nullptr, "DateTime64"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeInt64>();
}

View File

@ -165,7 +165,7 @@ private:
});
}
validateFunctionArgumentTypes(*this, arguments,
validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "encryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "plaintext"},
@ -438,7 +438,7 @@ private:
});
}
validateFunctionArgumentTypes(*this, arguments,
validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "decryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "ciphertext"},

View File

@ -2020,7 +2020,7 @@ public:
DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const
{
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}};
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}};
FunctionArgumentDescriptors optional_args;
if constexpr (to_decimal)
@ -2049,7 +2049,7 @@ public:
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"});
}
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
if constexpr (std::is_same_v<ToDataType, DataTypeInterval>)
{
@ -2390,7 +2390,7 @@ public:
if (isDateTime64<Name, ToDataType>(arguments))
{
validateFunctionArgumentTypes(*this, arguments,
validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{{"string", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}},
// optional
FunctionArgumentDescriptors{

View File

@ -518,66 +518,78 @@ struct Dispatcher
template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{
const auto & value_col_typed = checkAndGetColumn<ColumnVector<T>>(*value_col);
auto col_res = ColumnVector<T>::create();
typename ColumnVector<T>::Container & vec_res = col_res->getData();
vec_res.resize(value_col_typed.getData().size());
if (!vec_res.empty())
// Non-const value argument:
const auto * value_col_typed = checkAndGetColumn<ColumnVector<T>>(value_col);
if (value_col_typed)
{
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
if (scale_arg == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed.getData(), scale, vec_res);
}
else if (scale_arg > 0)
{
size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed.getData(), scale, vec_res);
}
else
{
size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed.getData(), scale, vec_res);
}
}
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & value_data = value_col_typed.getData();
const auto & scale_data = scale_col_typed->getData();
const size_t rows = value_data.size();
auto col_res = ColumnVector<T>::create();
for (size_t i = 0; i < rows; ++i)
{
Int64 scale64 = scale_data[i];
validateScale(scale64);
Scale raw_scale = scale64;
typename ColumnVector<T>::Container & vec_res = col_res->getData();
vec_res.resize(value_col_typed->getData().size());
if (raw_scale == 0)
if (!vec_res.empty())
{
// Const scale argument:
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
if (scale_arg == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::applyOne(value_data[i], scale, vec_res[i]);
FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed->getData(), scale, vec_res);
}
else if (raw_scale > 0)
else if (scale_arg > 0)
{
size_t scale = intExp10(raw_scale);
FunctionRoundingImpl<ScaleMode::Positive>::applyOne(value_data[i], scale, vec_res[i]);
size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed->getData(), scale, vec_res);
}
else
{
size_t scale = intExp10(-raw_scale);
FunctionRoundingImpl<ScaleMode::Negative>::applyOne(value_data[i], scale, vec_res[i]);
size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed->getData(), scale, vec_res);
}
}
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & value_data = value_col_typed->getData();
const auto & scale_data = scale_col_typed->getData();
const size_t rows = value_data.size();
for (size_t i = 0; i < rows; ++i)
{
Int64 scale64 = scale_data[i];
validateScale(scale64);
Scale raw_scale = scale64;
if (raw_scale == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::applyOne(value_data[i], scale, vec_res[i]);
}
else if (raw_scale > 0)
{
size_t scale = intExp10(raw_scale);
FunctionRoundingImpl<ScaleMode::Positive>::applyOne(value_data[i], scale, vec_res[i]);
}
else
{
size_t scale = intExp10(-raw_scale);
FunctionRoundingImpl<ScaleMode::Negative>::applyOne(value_data[i], scale, vec_res[i]);
}
}
}
}
return col_res;
}
return col_res;
// Const value argument:
const auto * value_col_typed_const = checkAndGetColumnConst<ColumnVector<T>>(value_col);
if (value_col_typed_const)
{
auto value_col_full = value_col_typed_const->convertToFullColumn();
return apply<ScaleType>(value_col_full.get(), scale_col);
}
return nullptr;
}
};
@ -589,38 +601,52 @@ public:
template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{
const auto & value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(*value_col);
const typename ColumnDecimal<T>::Container & vec_src = value_col_typed.getData();
auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed.getScale());
auto & vec_res = col_res->getData();
if (!vec_res.empty())
// Non-const value argument:
const auto * value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(value_col);
if (value_col_typed)
{
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg);
}
/// Non-const scale argument
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & scale = scale_col_typed->getData();
const size_t rows = vec_src.size();
const typename ColumnDecimal<T>::Container & vec_src = value_col_typed->getData();
for (size_t i = 0; i < rows; ++i)
auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed->getScale());
auto & vec_res = col_res->getData();
vec_res.resize(vec_src.size());
if (!vec_res.empty())
{
/// Const scale argument:
if (scale_col == nullptr || isColumnConst(*scale_col))
{
Int64 scale64 = scale[i];
validateScale(scale64);
Scale raw_scale = scale64;
auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg);
}
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & scale = scale_col_typed->getData();
const size_t rows = vec_src.size();
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(),
reinterpret_cast<ColumnDecimal<T>::NativeT&>(col_res->getElement(i)), raw_scale);
for (size_t i = 0; i < rows; ++i)
{
Int64 scale64 = scale[i];
validateScale(scale64);
Scale raw_scale = scale64;
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(),
reinterpret_cast<ColumnDecimal<T>::NativeT&>(col_res->getElement(i)), raw_scale);
}
}
}
}
return col_res;
return col_res;
}
// Const value argument:
const auto * value_col_typed_const = checkAndGetColumnConst<ColumnDecimal<T>>(value_col);
if (value_col_typed_const)
{
auto value_col_full = value_col_typed_const->convertToFullColumn();
return apply<ScaleType>(value_col_full.get(), scale_col);
}
return nullptr;
}
};
@ -647,7 +673,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"N", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "The number of decimal places to round to"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return arguments[0].type;
}
@ -671,9 +697,6 @@ public:
using ScaleTypes = std::decay_t<decltype(scaleTypes)>;
using ScaleType = typename ScaleTypes::RightType;
if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column");
res = Dispatcher<DataType, rounding_mode, tie_breaking_mode>::template apply<ScaleType>(value_arg.column.get(), scale_column.column.get());
return true;
};

View File

@ -48,7 +48,7 @@ namespace
{"json", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>());
}

View File

@ -23,7 +23,11 @@
namespace ProfileEvents
{
extern const Event RegexpCreated;
extern const Event RegexpWithMultipleNeedlesCreated;
extern const Event RegexpWithMultipleNeedlesGlobalCacheHit;
extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss;
extern const Event RegexpLocalCacheHit;
extern const Event RegexpLocalCacheMiss;
}
@ -72,18 +76,28 @@ public:
Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE];
if (bucket.regexp == nullptr) [[unlikely]]
{
/// insert new entry
ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss);
bucket = {pattern, std::make_shared<OptimizedRegularExpression>(createRegexp<like, no_capture, case_insensitive>(pattern))};
}
else
{
if (pattern != bucket.pattern)
{
/// replace existing entry
ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss);
bucket = {pattern, std::make_shared<OptimizedRegularExpression>(createRegexp<like, no_capture, case_insensitive>(pattern))};
}
else
ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit);
}
return bucket.regexp;
}
private:
constexpr static size_t CACHE_SIZE = 100; /// collision probability
constexpr static size_t CACHE_SIZE = 1'000; /// collision probability
std::hash<String> hasher;
struct Bucket
@ -244,7 +258,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message));
}
ProfileEvents::increment(ProfileEvents::RegexpCreated);
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated);
/// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch
/// function which is faster than allocating scratch space each time in each thread.
@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view
{
return constructRegexps<save_indices, with_edit_distance>(str_patterns, edit_distance);
});
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss);
bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps};
}
else
{
if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance)
{
/// replace existing entry
@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view
{
return constructRegexps<save_indices, with_edit_distance>(str_patterns, edit_distance);
});
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss);
bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps};
}
else
ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit);
}
return bucket.regexps;
}

View File

@ -32,7 +32,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -30,7 +30,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -30,7 +30,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -31,7 +31,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}

View File

@ -51,6 +51,8 @@ public:
bool isVariadic() const override { return impl.isVariadic(); }
size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); }
bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); }
bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); }
bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; }
@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase<MapToNestedAdapter<Name, retur
template <typename Name, size_t position>
struct MapToSubcolumnAdapter
{
static_assert(position <= 1);
static_assert(position <= 1, "position of Map subcolumn must be 0 or 1");
static void extractNestedTypes(DataTypes & types)
{
@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; };
using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>;
struct NameMapContains { static constexpr auto name = "mapContains"; };
using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapContains>;
using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapContains, 0>, NameMapContains>;
struct NameMapFilter { static constexpr auto name = "mapFilter"; };
using FunctionMapFilter = FunctionMapToArrayAdapter<FunctionArrayFilter, MapToNestedAdapter<NameMapFilter>, NameMapFilter>;

View File

@ -87,7 +87,7 @@ public:
{"array_1", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"array_2", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeNumber<ResultType>>();
}

View File

@ -39,7 +39,7 @@ public:
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"samples", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt), isColumnConst, "const UInt*"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
// Return an array with the same nested type as the input array
const DataTypePtr & array_type = arguments[0].type;

View File

@ -31,7 +31,7 @@ public:
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(array_type->getNestedType()));

View File

@ -159,7 +159,7 @@ public:
{"separator", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeString>();
}

View File

@ -203,7 +203,7 @@ private:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}};
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}};
FunctionArgumentDescriptors optional_args;
if (isDecimal(type) || isDateTime64(type))
@ -212,9 +212,9 @@ private:
if (isDateTimeOrDateTime64(type))
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"});
optional_args.push_back({"default_value", nullptr, nullptr, nullptr});
optional_args.push_back({"default_value", nullptr, nullptr, "any type"});
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
size_t additional_argument_index = 1;

View File

@ -38,7 +38,7 @@ public:
{"haystack", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"},
{"pattern", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "constant String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeUInt64>();
}

View File

@ -43,7 +43,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"epoch", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeUInt), isColumnConst, "const UInt*"}
};
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
validateFunctionArguments(*this, arguments, args, optional_args);
return std::make_shared<DataTypeUInt64>();
}
@ -91,7 +91,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"epoch", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeUInt), isColumnConst, "const UInt*"}
};
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
validateFunctionArguments(*this, arguments, args, optional_args);
return std::make_shared<DataTypeUInt64>();
}

View File

@ -59,7 +59,7 @@ public:
{"pattern", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -74,7 +74,7 @@ public:
{"haystack", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "const String or const FixedString"},
{"needle", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "const String or const FixedString"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
/// Two-dimensional array of strings, each `row` of top array represents matching groups.
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));

View File

@ -48,7 +48,7 @@ public:
{"haystack", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "const String or const FixedString"},
{"needle", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "const String or const FixedString"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}

View File

@ -54,7 +54,7 @@ public:
FunctionArgumentDescriptors args{
{"query", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
DataTypePtr string_type = std::make_shared<DataTypeString>();
if (error_handling == ErrorHandling::Null)

View File

@ -54,7 +54,7 @@ public:
{
FunctionArgumentDescriptors args{{"days", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "Integer"}};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<typename Traits::ReturnDataType>();
}

View File

@ -167,7 +167,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary expression"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeUInt64>();
}

View File

@ -30,9 +30,9 @@ public:
{
FunctionArgumentDescriptors mandatory_args;
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary Expression"}
{"expr", nullptr, nullptr, "any type"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeUUID>();
}

View File

@ -149,7 +149,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary expression"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeUUID>();
}

View File

@ -38,9 +38,12 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string");
FunctionArgumentDescriptors args{
{"encoded", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
};
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeTuple>(
DataTypes{std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeFloat64>()},

View File

@ -17,7 +17,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
}
namespace
@ -40,19 +39,16 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
validateArgumentType(*this, arguments, 0, isFloat, "float");
validateArgumentType(*this, arguments, 1, isFloat, "float");
if (arguments.size() == 3)
{
validateArgumentType(*this, arguments, 2, isInteger, "integer");
}
if (arguments.size() > 3)
{
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3",
getName());
}
FunctionArgumentDescriptors mandatory_args{
{"longitude", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), nullptr, "Float*"},
{"latitude", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), nullptr, "Float*"}
};
FunctionArgumentDescriptors optional_args{
{"precision", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "(U)Int*"}
};
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeString>();
}

View File

@ -35,22 +35,25 @@ public:
size_t getNumberOfArguments() const override { return 5; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
validateArgumentType(*this, arguments, 0, isFloat, "float");
validateArgumentType(*this, arguments, 1, isFloat, "float");
validateArgumentType(*this, arguments, 2, isFloat, "float");
validateArgumentType(*this, arguments, 3, isFloat, "float");
validateArgumentType(*this, arguments, 4, isUInt8, "integer");
FunctionArgumentDescriptors args{
{"longitute_min", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), nullptr, "Float*"},
{"latitude_min", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), nullptr, "Float*"},
{"longitute_max", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), nullptr, "Float*"},
{"latitude_max", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), nullptr, "Float*"},
{"precision", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt8), nullptr, "UInt8"}
};
validateFunctionArguments(*this, arguments, args);
if (!(arguments[0]->equals(*arguments[1]) &&
arguments[0]->equals(*arguments[2]) &&
arguments[0]->equals(*arguments[3])))
if (!(arguments[0].type->equals(*arguments[1].type) &&
arguments[0].type->equals(*arguments[2].type) &&
arguments[0].type->equals(*arguments[3].type)))
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type of argument of {} all coordinate arguments must have the same type, "
"instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(),
arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName());
"instead they are:{}, {}, {}, {}.", getName(), arguments[0].type->getName(),
arguments[1].type->getName(), arguments[2].type->getName(), arguments[3].type->getName());
}
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());

View File

@ -87,7 +87,7 @@ public:
{mandatory_argument_names_year_month_day[1], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNumber), nullptr, "Number"},
{mandatory_argument_names_year_month_day[2], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNumber), nullptr, "Number"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
}
else
{
@ -95,7 +95,7 @@ public:
{mandatory_argument_names_year_dayofyear[0], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNumber), nullptr, "Number"},
{mandatory_argument_names_year_dayofyear[1], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNumber), nullptr, "Number"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
}
return std::make_shared<typename Traits::ReturnDataType>();
@ -193,7 +193,7 @@ public:
{mandatory_argument_names[0], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNumber), nullptr, "Number"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<typename Traits::ReturnDataType>();
}
@ -357,7 +357,7 @@ public:
{optional_argument_names[0], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
/// Optional timezone argument
std::string timezone;
@ -440,7 +440,7 @@ public:
{optional_argument_names[2], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
if (arguments.size() >= mandatory_argument_names.size() + 1)
{
@ -572,7 +572,7 @@ public:
{optional_argument_names[0], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
/// Optional timezone argument
std::string timezone;
@ -652,7 +652,7 @@ public:
{optional_argument_names[0], static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
/// Optional precision argument
auto precision = DEFAULT_PRECISION;

View File

@ -589,7 +589,7 @@ namespace
{"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), &isColumnConst, "const String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
String time_zone_name = getTimeZone(arguments).getTimeZone();
DataTypePtr date_type = std::make_shared<DataTypeDateTime>(time_zone_name);

View File

@ -68,7 +68,7 @@ public:
{
{"readable_size", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
DataTypePtr return_type = std::make_shared<DataTypeUInt64>();
if constexpr (error_handling == ErrorHandling::Null)
return std::make_shared<DataTypeNullable>(return_type);

View File

@ -54,7 +54,7 @@ public:
if (arguments.size() == 3)
args.emplace_back(FunctionArgumentDescriptor{"index", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"});
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeString>();
}

View File

@ -201,7 +201,7 @@ public:
{"n", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeString>();
}

View File

@ -45,7 +45,7 @@ public:
{"time_series", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"period", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeUInt), nullptr, "Unsigned Integer"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat32>()));
}

View File

@ -51,7 +51,7 @@ public:
{"max_percentile", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isFloat), isColumnConst, "Number"},
{"k", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeNumber), isColumnConst, "Number"}};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
}

View File

@ -53,7 +53,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{{"time_series", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"}};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeFloat64>();
}

View File

@ -64,7 +64,7 @@ public:
FunctionArgumentDescriptors args{
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime), nullptr, "DateTime"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeInt64>();
}
@ -121,7 +121,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"time_zone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
String timezone;
if (arguments.size() == 2)
@ -190,7 +190,7 @@ public:
FunctionArgumentDescriptors args{
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime64), nullptr, "DateTime64"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeInt64>();
}
@ -255,7 +255,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"time_zone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
String timezone;
if (arguments.size() == 2)

View File

@ -56,7 +56,7 @@ public:
{"epoch", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeUInt), isColumnConst, "const UInt*"},
{"time_zone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
validateFunctionArguments(*this, arguments, args, optional_args);
String timezone;
if (arguments.size() == 3)
@ -127,7 +127,7 @@ public:
{"epoch", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeUInt), isColumnConst, "const UInt*"},
{"time_zone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
validateFunctionArguments(*this, arguments, args, optional_args);
String timezone;
if (arguments.size() == 3)

View File

@ -48,7 +48,7 @@ public:
{"n", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeString>();
}

View File

@ -100,7 +100,7 @@ public:
FunctionArgumentDescriptors args{
{"sqid", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}

View File

@ -46,7 +46,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"time", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeDateTime64>(DATETIME_SCALE);
}

View File

@ -43,7 +43,7 @@ public:
{"precision", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), &isColumnConst, "const Integer"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, {});
validateFunctionArguments(*this, arguments, mandatory_args, {});
return std::make_shared<DataTypeString>();
}

View File

@ -274,10 +274,12 @@ void DatabaseCatalog::shutdownImpl()
database->shutdown();
}
TablesMarkedAsDropped tables_marked_dropped_to_destroy;
{
std::lock_guard lock(tables_marked_dropped_mutex);
tables_marked_dropped.clear();
tables_marked_dropped.swap(tables_marked_dropped_to_destroy);
}
tables_marked_dropped_to_destroy.clear();
std::lock_guard lock(databases_mutex);
for (const auto & db : databases)

View File

@ -43,6 +43,7 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
}
namespace
@ -170,6 +171,7 @@ struct QueryASTSettings
struct QueryTreeSettings
{
bool run_passes = true;
bool dump_tree = true;
bool dump_passes = false;
bool dump_ast = false;
Int64 passes = -1;
@ -179,6 +181,7 @@ struct QueryTreeSettings
std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
{
{"run_passes", run_passes},
{"dump_tree", dump_tree},
{"dump_passes", dump_passes},
{"dump_ast", dump_ast}
};
@ -398,7 +401,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query");
auto settings = checkAndGetSettings<QueryTreeSettings>(ast.getSettings());
if (!settings.dump_tree && !settings.dump_ast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query");
auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext());
bool need_newline = false;
if (settings.run_passes)
{
@ -410,23 +417,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
if (settings.dump_passes)
{
query_tree_pass_manager.dump(buf, pass_index);
if (pass_index > 0)
buf << '\n';
need_newline = true;
}
query_tree_pass_manager.run(query_tree, pass_index);
}
if (settings.dump_tree)
{
if (need_newline)
buf << "\n\n";
query_tree->dumpTree(buf);
}
else
{
query_tree->dumpTree(buf);
need_newline = true;
}
if (settings.dump_ast)
{
buf << '\n';
buf << '\n';
if (need_newline)
buf << "\n\n";
query_tree->toAST()->format(IAST::FormatSettings(buf, false));
}

View File

@ -1,157 +0,0 @@
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/NestedUtils.h>
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Common/assert_cast.h>
namespace DB
{
namespace
{
ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
return std::make_shared<ASTIdentifier>(Nested::concatenateName(name_in_storage, subcolumn_name));
}
ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("equals", ast, std::make_shared<ASTLiteral>(0u));
}
ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("notEquals", ast, std::make_shared<ASTLiteral>(0u));
}
ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("not", ast);
}
ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("sum", makeASTFunction("not", ast));
}
ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("has", ast, arg);
}
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn =
{
{"length", {TypeIndex::Array, "size0", transformToSubcolumn}},
{"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}},
{"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}},
{"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}},
{"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}},
{"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}},
{"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}},
{"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}},
};
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformMapContainsToSubcolumn)>> binary_function_to_subcolumn
{
{"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}},
};
}
void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const
{
const auto & arguments = function.arguments->children;
if (arguments.empty() || arguments.size() > 2)
return;
const auto * identifier = arguments[0]->as<ASTIdentifier>();
if (!identifier)
return;
const auto & columns = metadata_snapshot->getColumns();
const auto & name_in_storage = identifier->name();
if (!columns.has(name_in_storage))
return;
const auto & column_type = columns.get(name_in_storage).type;
TypeIndex column_type_id = column_type->getTypeId();
const auto & alias = function.tryGetAlias();
if (arguments.size() == 1)
{
auto it = unary_function_to_subcolumn.find(function.name);
if (it != unary_function_to_subcolumn.end())
{
const auto & [type_id, subcolumn_name, transformer] = it->second;
if (column_type_id == type_id)
{
ast = transformer(name_in_storage, subcolumn_name);
ast->setAlias(alias);
}
}
}
else
{
if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple)
{
const auto * literal = arguments[1]->as<ASTLiteral>();
if (!literal)
return;
String subcolumn_name;
auto value_type = literal->value.getType();
if (value_type == Field::Types::UInt64)
{
const auto & type_tuple = assert_cast<const DataTypeTuple &>(*column_type);
auto index = literal->value.get<UInt64>();
subcolumn_name = type_tuple.getNameByPosition(index);
}
else if (value_type == Field::Types::String)
subcolumn_name = literal->value.get<const String &>();
else
return;
ast = transformToSubcolumn(name_in_storage, subcolumn_name);
ast->setAlias(alias);
}
else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant)
{
const auto * literal = arguments[1]->as<ASTLiteral>();
if (!literal)
return;
String subcolumn_name;
auto value_type = literal->value.getType();
if (value_type != Field::Types::String)
return;
subcolumn_name = literal->value.get<const String &>();
ast = transformToSubcolumn(name_in_storage, subcolumn_name);
ast->setAlias(alias);
}
else
{
auto it = binary_function_to_subcolumn.find(function.name);
if (it != binary_function_to_subcolumn.end())
{
const auto & [type_id, subcolumn_name, transformer] = it->second;
if (column_type_id == type_id)
{
ast = transformer(name_in_storage, subcolumn_name, arguments[1]);
ast->setAlias(alias);
}
}
}
}
}
}

View File

@ -1,25 +0,0 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Storages/StorageInMemoryMetadata.h>
namespace DB
{
class ASTFunction;
/// Rewrites functions to subcolumns, if possible, to reduce amount of read data.
/// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null'
class RewriteFunctionToSubcolumnData
{
public:
using TypeToVisit = ASTFunction;
void visit(ASTFunction & function, ASTPtr & ast) const;
StorageMetadataPtr metadata_snapshot;
};
using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher<RewriteFunctionToSubcolumnData>;
using RewriteFunctionToSubcolumnVisitor = InDepthNodeVisitor<RewriteFunctionToSubcolumnMatcher, true>;
}

View File

@ -17,7 +17,6 @@
#include <Interpreters/RewriteCountVariantsVisitor.h>
#include <Interpreters/ConvertStringsToEnumVisitor.h>
#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
@ -564,12 +563,6 @@ void transformIfStringsIntoEnum(ASTPtr & query)
ConvertStringsToEnumVisitor(convert_data).visit(query);
}
void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
{
RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
RewriteFunctionToSubcolumnVisitor(data).visit(query);
}
void optimizeOrLikeChain(ASTPtr & query)
{
ConvertFunctionOrLikeVisitor::Data data = {};
@ -634,9 +627,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
if (!select_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts.");
if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns())
optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata);
/// Move arithmetic operations out of aggregation functions
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
optimizeAggregationFunctions(query);

View File

@ -165,6 +165,8 @@ public:
/// Returns true if the storage supports reading of subcolumns of complex types.
virtual bool supportsSubcolumns() const { return false; }
/// Returns true if storage supports optimizations of functions by reading subcolumns.
virtual bool supportsOptimizationToSubcolumns() const { return supportsSubcolumns(); }
/// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries.
/// Storage may throw an exception later if some query kind is not fully supported.

View File

@ -37,7 +37,10 @@ public:
QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
bool isRemote() const override { return true; }
bool isRemote() const final { return true; }
bool supportsSubcolumns() const override { return true; }
bool supportsOptimizationToSubcolumns() const override { return false; }
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
protected:
virtual void updateBeforeRead(const ContextPtr &) {}

View File

@ -24,12 +24,6 @@ public:
std::string getName() const override;
bool supportsSubcolumns() const override { return true; }
bool supportsDynamicSubcolumns() const override { return true; }
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
RemoteQueryExecutor::Extension getTaskIteratorExtension(
const ActionsDAG::Node * predicate, const ContextPtr & context) const override;

View File

@ -79,6 +79,7 @@ private:
void drop() override;
bool supportsSubsetOfColumns(const ContextPtr & context_) const;
bool supportsSubcolumns() const override { return true; }
bool supportsOptimizationToSubcolumns() const override { return false; }
bool supportsDynamicSubcolumns() const override { return true; }
std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate);

View File

@ -1,15 +1,14 @@
#include <optional>
#include <numeric>
#include <Storages/Statistics/Statistics.h>
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
#include <Storages/Statistics/TDigestStatistics.h>
#include <Storages/Statistics/UniqStatistics.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Storages/Statistics/StatisticsUniq.h>
#include <Storages/StatisticsDescription.h>
#include <Storages/ColumnsDescription.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -20,32 +19,57 @@ namespace ErrorCodes
extern const int INCORRECT_QUERY;
}
/// Version / bitmask of statistics / data of statistics /
enum StatisticsFileVersion : UInt16
{
V0 = 0,
};
IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {}
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
: stat(stat_)
{
}
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_)
: stats_desc(stats_desc_), rows(0)
: stats_desc(stats_desc_)
{
}
void ColumnStatistics::update(const ColumnPtr & column)
{
rows += column->size();
for (const auto & iter : stats)
{
iter.second->update(column);
}
for (const auto & stat : stats)
stat.second->update(column);
}
UInt64 IStatistics::estimateCardinality() const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateEqual(Float64 /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateLess(Float64 /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
}
/// -------------------------------------
/// Implementation of the estimation:
/// Note: Each statistics object supports certain types predicates natively, e.g.
/// - TDigest: '< X' (less-than predicates)
/// - Count-min sketches: '= X' (equal predicates)
/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality)
/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way.
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
/// object that supports it natively.
Float64 ColumnStatistics::estimateLess(Float64 val) const
{
if (stats.contains(StatisticsType::TDigest))
return std::static_pointer_cast<TDigestStatistics>(stats.at(StatisticsType::TDigest))->estimateLess(val);
return stats.at(StatisticsType::TDigest)->estimateLess(val);
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
}
@ -58,14 +82,9 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const
{
if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
{
auto uniq_static = std::static_pointer_cast<UniqStatistics>(stats.at(StatisticsType::Uniq));
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows)
/// for every bucket.
if (uniq_static->getCardinality() < 2048)
{
auto tdigest_static = std::static_pointer_cast<TDigestStatistics>(stats.at(StatisticsType::TDigest));
return tdigest_static->estimateEqual(val);
}
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
return stats.at(StatisticsType::TDigest)->estimateEqual(val);
}
if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
@ -73,17 +92,22 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
}
/// -------------------------------------
void ColumnStatistics::serialize(WriteBuffer & buf)
{
writeIntBinary(V0, buf);
UInt64 stat_types_mask = 0;
for (const auto & [type, _]: stats)
stat_types_mask |= 1 << UInt8(type);
writeIntBinary(stat_types_mask, buf);
/// We write some basic statistics
/// as the column row count is always useful, save it in any case
writeIntBinary(rows, buf);
/// We write complex statistics
for (const auto & [type, stat_ptr]: stats)
/// write the actual statistics object
for (const auto & [type, stat_ptr] : stats)
stat_ptr->serialize(buf);
}
@ -96,7 +120,9 @@ void ColumnStatistics::deserialize(ReadBuffer &buf)
UInt64 stat_types_mask = 0;
readIntBinary(stat_types_mask, buf);
readIntBinary(rows, buf);
for (auto it = stats.begin(); it != stats.end();)
{
if (!(stat_types_mask & 1 << UInt8(it->first)))
@ -136,15 +162,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
{
if (!validators.emplace(stats_type, std::move(validator)).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type);
}
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
{
registerCreator(StatisticsType::TDigest, TDigestCreator);
registerCreator(StatisticsType::Uniq, UniqCreator);
registerValidator(StatisticsType::TDigest, TDigestValidator);
registerCreator(StatisticsType::TDigest, TDigestCreator);
registerValidator(StatisticsType::Uniq, UniqValidator);
registerCreator(StatisticsType::Uniq, UniqCreator);
}
MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
@ -159,9 +185,7 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st
{
auto it = validators.find(type);
if (it == validators.end())
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type);
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistic type '{}'", type);
it->second(desc, data_type);
}
}
@ -173,10 +197,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
{
auto it = creators.find(type);
if (it == creators.end())
{
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Unknown Statistic type '{}'. Available types: tdigest, uniq", type);
}
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
auto stat_ptr = (it->second)(desc, stats.data_type);
column_stat->stats[type] = stat_ptr;
}

View File

@ -1,19 +1,15 @@
#pragma once
#include <memory>
#include <boost/core/noncopyable.hpp>
#include <Core/Block.h>
#include <Common/logger_useful.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Storages/StatisticsDescription.h>
#include <boost/core/noncopyable.hpp>
namespace DB
{
/// this is for user-defined statistic.
constexpr auto STATS_FILE_PREFIX = "statistics_";
constexpr auto STATS_FILE_SUFFIX = ".stats";
@ -25,14 +21,21 @@ class IStatistics
{
public:
explicit IStatistics(const SingleStatisticsDescription & stat_);
virtual ~IStatistics() = default;
virtual void serialize(WriteBuffer & buf) = 0;
virtual void update(const ColumnPtr & column) = 0;
virtual void serialize(WriteBuffer & buf) = 0;
virtual void deserialize(ReadBuffer & buf) = 0;
virtual void update(const ColumnPtr & column) = 0;
/// Estimate the cardinality of the column.
/// Throws if the statistics object is not able to do a meaningful estimation.
virtual UInt64 estimateCardinality() const;
/// Per-value estimations.
/// Throws if the statistics object is not able to do a meaningful estimation.
virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column
protected:
SingleStatisticsDescription stat;
@ -43,11 +46,12 @@ using StatisticsPtr = std::shared_ptr<IStatistics>;
class ColumnStatistics
{
public:
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_);
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_);
void serialize(WriteBuffer & buf);
void deserialize(ReadBuffer & buf);
String getFileName() const;
String getFileName() const;
const String & columnName() const;
UInt64 rowCount() const;
@ -55,17 +59,14 @@ public:
void update(const ColumnPtr & column);
Float64 estimateLess(Float64 val) const;
Float64 estimateGreater(Float64 val) const;
Float64 estimateEqual(Float64 val) const;
private:
friend class MergeTreeStatisticsFactory;
ColumnStatisticsDescription stats_desc;
std::map<StatisticsType, StatisticsPtr> stats;
UInt64 rows; /// the number of rows of the column
UInt64 rows = 0; /// the number of rows in the column
};
class ColumnsDescription;
@ -79,25 +80,23 @@ public:
void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const;
using Validator = std::function<void(const SingleStatisticsDescription & stats, DataTypePtr data_type)>;
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, DataTypePtr data_type)>;
using Validator = std::function<void(const SingleStatisticsDescription & stats, DataTypePtr data_type)>;
ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const;
ColumnsStatistics getMany(const ColumnsDescription & columns) const;
void registerCreator(StatisticsType type, Creator creator);
void registerValidator(StatisticsType type, Validator validator);
void registerCreator(StatisticsType type, Creator creator);
protected:
MergeTreeStatisticsFactory();
private:
using Creators = std::unordered_map<StatisticsType, Creator>;
using Validators = std::unordered_map<StatisticsType, Validator>;
Creators creators;
using Creators = std::unordered_map<StatisticsType, Creator>;
Validators validators;
Creators creators;
};
}

View File

@ -0,0 +1,60 @@
#include <Storages/Statistics/StatisticsTDigest.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_STATISTICS;
}
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
: IStatistics(stat_)
{
}
void StatisticsTDigest::update(const ColumnPtr & column)
{
size_t rows = column->size();
for (size_t row = 0; row < rows; ++row)
{
/// TODO: support more types.
Float64 value = column->getFloat64(row);
t_digest.add(value, 1);
}
}
void StatisticsTDigest::serialize(WriteBuffer & buf)
{
t_digest.serialize(buf);
}
void StatisticsTDigest::deserialize(ReadBuffer & buf)
{
t_digest.deserialize(buf);
}
Float64 StatisticsTDigest::estimateLess(Float64 val) const
{
return t_digest.getCountLessThan(val);
}
Float64 StatisticsTDigest::estimateEqual(Float64 val) const
{
return t_digest.getCountEqual(val);
}
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
}
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
{
return std::make_shared<StatisticsTDigest>(stat);
}
}

View File

@ -6,27 +6,24 @@
namespace DB
{
/// TDigestStatistic is a kind of histogram.
class TDigestStatistics : public IStatistics
class StatisticsTDigest : public IStatistics
{
public:
explicit TDigestStatistics(const SingleStatisticsDescription & stat_);
Float64 estimateLess(Float64 val) const;
Float64 estimateEqual(Float64 val) const;
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
explicit StatisticsTDigest(const SingleStatisticsDescription & stat_);
void update(const ColumnPtr & column) override;
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
Float64 estimateLess(Float64 val) const override;
Float64 estimateEqual(Float64 val) const override;
private:
QuantileTDigest<Float64> data;
QuantileTDigest<Float64> t_digest;
};
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}

View File

@ -1,4 +1,4 @@
#include <Storages/Statistics/UniqStatistics.h>
#include <Storages/Statistics/StatisticsUniq.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
@ -10,7 +10,7 @@ namespace ErrorCodes
extern const int ILLEGAL_STATISTICS;
}
UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type)
StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type)
: IStatistics(stat_)
{
arena = std::make_unique<Arena>();
@ -20,29 +20,12 @@ UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const
collector->create(data);
}
UniqStatistics::~UniqStatistics()
StatisticsUniq::~StatisticsUniq()
{
collector->destroy(data);
}
UInt64 UniqStatistics::getCardinality()
{
auto column = DataTypeUInt64().createColumn();
collector->insertResultInto(data, *column, nullptr);
return column->getUInt(0);
}
void UniqStatistics::serialize(WriteBuffer & buf)
{
collector->serialize(data, buf);
}
void UniqStatistics::deserialize(ReadBuffer & buf)
{
collector->deserialize(data, buf);
}
void UniqStatistics::update(const ColumnPtr & column)
void StatisticsUniq::update(const ColumnPtr & column)
{
/// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly.
/// Here we intend to avoid crash in CI.
@ -51,16 +34,33 @@ void UniqStatistics::update(const ColumnPtr & column)
collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr);
}
void StatisticsUniq::serialize(WriteBuffer & buf)
{
collector->serialize(data, buf);
}
void StatisticsUniq::deserialize(ReadBuffer & buf)
{
collector->deserialize(data, buf);
}
UInt64 StatisticsUniq::estimateCardinality() const
{
auto column = DataTypeUInt64().createColumn();
collector->insertResultInto(data, *column, nullptr);
return column->getUInt(0);
}
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName());
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
}
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<UniqStatistics>(stat, data_type);
return std::make_shared<StatisticsUniq>(stat, data_type);
}
}

View File

@ -7,30 +7,27 @@
namespace DB
{
class UniqStatistics : public IStatistics
class StatisticsUniq : public IStatistics
{
public:
UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type);
~UniqStatistics() override;
UInt64 getCardinality();
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type);
~StatisticsUniq() override;
void update(const ColumnPtr & column) override;
private:
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
UInt64 estimateCardinality() const override;
private:
std::unique_ptr<Arena> arena;
AggregateFunctionPtr collector;
AggregateDataPtr data;
};
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
}

View File

@ -1,60 +0,0 @@
#include <Storages/Statistics/TDigestStatistics.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_STATISTICS;
}
TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_):
IStatistics(stat_)
{
}
Float64 TDigestStatistics::estimateLess(Float64 val) const
{
return data.getCountLessThan(val);
}
Float64 TDigestStatistics::estimateEqual(Float64 val) const
{
return data.getCountEqual(val);
}
void TDigestStatistics::serialize(WriteBuffer & buf)
{
data.serialize(buf);
}
void TDigestStatistics::deserialize(ReadBuffer & buf)
{
data.deserialize(buf);
}
void TDigestStatistics::update(const ColumnPtr & column)
{
size_t size = column->size();
for (size_t i = 0; i < size; ++i)
{
/// TODO: support more types.
Float64 value = column->getFloat64(i);
data.add(value, 1);
}
}
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
{
return std::make_shared<TDigestStatistics>(stat);
}
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName());
}
}

View File

@ -1,6 +1,6 @@
#include <gtest/gtest.h>
#include <Storages/Statistics/TDigestStatistics.h>
#include <Storages/Statistics/StatisticsTDigest.h>
TEST(Statistics, TDigestLessThan)
{

View File

@ -89,6 +89,7 @@ public:
bool supportsSubsetOfColumns(const ContextPtr & context) const;
bool supportsSubcolumns() const override { return true; }
bool supportsOptimizationToSubcolumns() const override { return false; }
bool supportsDynamicSubcolumns() const override { return true; }

View File

@ -27,15 +27,8 @@ public:
const ConstraintsDescription & constraints_);
std::string getName() const override { return "FileCluster"; }
RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
bool supportsSubcolumns() const override { return true; }
bool supportsDynamicSubcolumns() const override { return true; }
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
private:
void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;

View File

@ -296,6 +296,7 @@ public:
}
bool supportsSubcolumns() const override { return true; }
bool supportsOptimizationToSubcolumns() const override { return false; }
bool supportsDynamicSubcolumns() const override { return true; }

View File

@ -30,15 +30,8 @@ public:
const StorageURL::Configuration & configuration_);
std::string getName() const override { return "URLCluster"; }
RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
bool supportsSubcolumns() const override { return true; }
bool supportsDynamicSubcolumns() const override { return true; }
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
private:
void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;

View File

@ -0,0 +1,3 @@
<clickhouse>
<max_thread_pool_size>10000</max_thread_pool_size>
</clickhouse>

Some files were not shown because too many files have changed in this diff Show More