Merge remote-tracking branch 'origin/master' into mv4

This commit is contained in:
Michael Kolupaev 2024-07-04 08:28:59 +00:00
commit ba5e50cedc
292 changed files with 7460 additions and 5670 deletions

1
.gitattributes vendored
View File

@ -2,3 +2,4 @@ contrib/* linguist-vendored
*.h linguist-language=C++
tests/queries/0_stateless/data_json/* binary
tests/queries/0_stateless/*.reference -crlf
src/Core/SettingsChangesHistory.cpp merge=union

View File

@ -13,5 +13,4 @@ rules:
level: warning
comments:
min-spaces-from-content: 1
document-start:
present: false
document-start: disable

View File

@ -34,7 +34,7 @@
* Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)).
* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
* Add `http_response_headers` configuration to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing.
* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)).
* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)).

View File

@ -1,32 +1,3 @@
// Based on https://github.com/amdn/itoa and combined with our optimizations
//
//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-//
//
// The MIT License (MIT)
// Copyright (c) 2016 Arturo Martin-de-Nicolas
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//===----------------------------------------------------------------------===//
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>
#include <base/defines.h>
#include <base/extended_types.h>
@ -34,99 +5,15 @@
namespace
{
template <typename T>
ALWAYS_INLINE inline constexpr T pow10(size_t x)
{
return x ? 10 * pow10<T>(x - 1) : 1;
}
// Division by a power of 10 is implemented using a multiplicative inverse.
// This strength reduction is also done by optimizing compilers, but
// presently the fastest results are produced by using the values
// for the multiplication and the shift as given by the algorithm
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
//
// http://www.agner.org/optimize/optimizing_assembly.pdf
//
// "Integer division by a constant (all processors)
// A floating point number can be divided by a constant by multiplying
// with the reciprocal. If we want to do the same with integers, we have
// to scale the reciprocal by 2n and then shift the product to the right
// by n. There are various algorithms for finding a suitable value of n
// and compensating for rounding errors. The algorithm described below
// was invented by Terje Mathisen, Norway, and not published elsewhere."
/// Division by constant is performed by:
/// 1. Adding 1 if needed;
/// 2. Multiplying by another constant;
/// 3. Shifting right by another constant.
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
struct Division
{
static constexpr bool add{add_};
static constexpr UInt multiplier{multiplier_};
static constexpr unsigned shift{shift_};
};
/// Select a type with appropriate number of bytes from the list of types.
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
template <size_t N, typename T, typename... Ts>
struct SelectType
{
using Result = typename SelectType<N / 2, Ts...>::Result;
};
template <typename T, typename... Ts>
struct SelectType<1, T, Ts...>
{
using Result = T;
};
/// Division by 10^N where N is the size of the type.
template <size_t N>
using DivisionBy10PowN = typename SelectType<
N,
Division<uint8_t, false, 205U, 11>, /// divide by 10
Division<uint16_t, true, 41943U, 22>, /// divide by 100
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
>::Result;
template <size_t N>
using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
template <size_t N>
struct QuotientAndRemainder
{
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
};
template <size_t N>
QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
{
constexpr DivisionBy10PowN<N> division;
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
return {quotient, remainder};
}
ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value)
{
*p = '0' + value;
++p;
return p;
return p + 1;
}
// Using a lookup table to convert binary numbers from 0 to 99
// into ascii characters as described by Andrei Alexandrescu in
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
const char digits[201] = "00010203040506070809"
"10111213141516171819"
"20212223242526272829"
@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
{
memcpy(p, &digits[value * 2], 2);
@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
return p;
}
namespace convert
namespace jeaiii
{
template <typename UInt, size_t N = sizeof(UInt)>
char * head(char * p, UInt u);
template <typename UInt, size_t N = sizeof(UInt)>
char * tail(char * p, UInt u);
/*
MIT License
//===----------------------------------------------------------===//
// head: find most significant digit, skip leading zeros
//===----------------------------------------------------------===//
Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
// "x" contains quotient and remainder after division by 10^N
// quotient is less than 10^N
template <size_t N>
ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
struct pair
{
p = head(p, UnsignedOfSize<N / 2>(x.quotient));
p = tail(p, x.remainder);
return p;
char dd[2];
constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor)
constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor)
};
constexpr struct
{
pair dd[100]{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, //
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
};
pair fd[100]{
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
};
} digits;
constexpr UInt64 mask24 = (UInt64(1) << 24) - 1;
constexpr UInt64 mask32 = (UInt64(1) << 32) - 1;
constexpr UInt64 mask57 = (UInt64(1) << 57) - 1;
template <bool, class, class F>
struct _cond
{
using type = F;
};
template <class T, class F>
struct _cond<true, T, F>
{
using type = T;
};
template <bool B, class T, class F>
using cond = typename _cond<B, T, F>::type;
template <class T>
inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i)
{
constexpr auto q = sizeof(T);
using U = cond<q == 1, char8_t, cond<q <= sizeof(UInt16), UInt16, cond<q <= sizeof(UInt32), UInt32, UInt64>>>;
// convert bool to int before test with unary + to silence warning if T happens to be bool
U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i);
if (n < U(1e2))
{
/// This is changed from the original jeaiii implementation
/// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions
/// and a memory access (no need to read digits.fd[n])
/// This is not true for pure random numbers, but that's not the common use case of a database
/// Original jeaii code
// *reinterpret_cast<pair *>(b) = digits.fd[n];
// return n < 10 ? b + 1 : b + 2;
return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n);
}
if (n < UInt32(1e6))
{
if (sizeof(U) == 1 || n < U(1e4))
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
if constexpr (sizeof(U) == 1)
b -= 1;
else
b -= n < U(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
return b + 4;
}
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
if constexpr (sizeof(U) == 2)
b -= 1;
else
b -= n < U(1e5);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
return b + 6;
}
if (sizeof(U) == 4 || n < UInt64(1ull << 32ull))
{
if (n < U(1e8))
{
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= n < U(1e7);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
return b + 8;
}
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
b -= n < UInt32(1e9);
auto f2 = (f0 & mask57) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
auto f4 = (f2 & mask57) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
auto f6 = (f4 & mask57) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
auto f8 = (f6 & mask57) * 100;
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
return b + 10;
}
// "u" is less than 10^2*N
template <typename UInt, size_t N>
ALWAYS_INLINE inline char * head(char * p, UInt u)
// if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings
UInt32 z = n % UInt32(1e8);
UInt64 u = n / UInt32(1e8);
if (u < UInt32(1e2))
{
return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
// u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
*reinterpret_cast<pair *>(b) = digits.dd[u];
b += 2;
}
// recursion base case, selected when "u" is one byte
template <>
ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
else if (u < UInt32(1e6))
{
return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
if (u < UInt32(1e4))
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
b -= u < UInt32(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
b += 4;
}
//===----------------------------------------------------------===//
// tail: produce all digits including leading zeros
//===----------------------------------------------------------===//
// recursive step, "u" is less than 10^2*N
template <typename UInt, size_t N>
ALWAYS_INLINE inline char * tail(char * p, UInt u)
{
QuotientAndRemainder<N> x = split<N>(u);
p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
p = tail(p, x.remainder);
return p;
}
// recursion base case, selected when "u" is one byte
template <>
ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{
return outTwoDigits(p, u);
}
//===----------------------------------------------------------===//
// large values are >= 10^2*N
// where x contains quotient and remainder after division by 10^N
//===----------------------------------------------------------===//
template <size_t N>
ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
{
QuotientAndRemainder<N> y = split<N>(x.quotient);
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
p = tail(p, y.remainder);
p = tail(p, x.remainder);
return p;
}
//===----------------------------------------------------------===//
// handle values of "u" that might be >= 10^2*N
// where N is the size of "u" in bytes
//===----------------------------------------------------------===//
template <typename UInt, size_t N = sizeof(UInt)>
ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
{
if (u < pow10<UnsignedOfSize<N>>(N))
return head(p, UnsignedOfSize<N / 2>(u));
QuotientAndRemainder<N> x = split<N>(u);
return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
}
// selected when "u" is one byte
template <>
ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{
if (u < 10)
return outDigit(p, u);
else if (u < 100)
return outTwoDigits(p, u);
else
{
p = outDigit(p, u / 100);
p = outTwoDigits(p, u % 100);
return p;
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= u < UInt32(1e5);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
b += 6;
}
}
//===----------------------------------------------------------===//
// handle unsigned and signed integral operands
//===----------------------------------------------------------===//
// itoa: handle unsigned integral operands (selected by SFINAE)
template <typename U>
requires(!std::is_signed_v<U> && std::is_integral_v<U>)
ALWAYS_INLINE inline char * itoa(U u, char * p)
else if (u < UInt32(1e8))
{
return convert::uitoa(p, u);
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= u < UInt32(1e7);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
b += 8;
}
// itoa: handle signed integral operands (selected by SFINAE)
template <typename I, size_t N = sizeof(I)>
requires(std::is_signed_v<I> && std::is_integral_v<I>)
ALWAYS_INLINE inline char * itoa(I i, char * p)
else if (u < UInt64(1ull << 32ull))
{
// Need "mask" to be filled with a copy of the sign bit.
// If "i" is a negative value, then the result of "operator >>"
// is implementation-defined, though usually it is an arithmetic
// right shift that replicates the sign bit.
// Use a conditional expression to be portable,
// a good optimizing compiler generates an arithmetic right shift
// and avoids the conditional branch.
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
// Cannot use std::abs() because the result is undefined
// in 2's complement systems for the most-negative value.
// Want to avoid conditional branch for performance reasons since
// CPU branch prediction will be ineffective when negative values
// occur randomly.
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
// This yields the absolute value with the desired type without
// using a conditional branch and without invoking undefined or
// implementation defined behavior:
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
// Unconditionally store a minus sign when producing digits
// in a forward direction and increment the pointer only if
// the value is in fact negative.
// This avoids a conditional branch and is safe because we will
// always produce at least one digit and it will overwrite the
// minus sign when the value is not negative.
*p = '-';
p += (mask & 1);
p = convert::uitoa(p, u);
return p;
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
b -= u < UInt32(1e9);
auto f2 = (f0 & mask57) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
auto f4 = (f2 & mask57) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
auto f6 = (f4 & mask57) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
auto f8 = (f6 & mask57) * 100;
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
b += 10;
}
else
{
UInt32 y = u % UInt32(1e8);
u /= UInt32(1e8);
// u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
if (u < UInt32(1e2))
{
*reinterpret_cast<pair *>(b) = digits.dd[u];
b += 2;
}
else
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
b -= u < UInt32(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
b += 4;
}
// do 8 digits
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
b += 8;
}
// do 8 digits
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
return b + 8;
}
}
@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
{
/// If we the highest 64bit item is empty, we can print just the lowest item as u64
if (_x.items[UInt128::_impl::little(1)] == 0)
return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]);
/// Doing operations using __int128 is faster and we already rely on this feature
using T = unsigned __int128;
@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
current_block += max_multiple_of_hundred_blocks;
}
char * highest_part_print = convert::itoa(uint64_t(x), p);
char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x));
for (int i = 0; i < current_block; i++)
{
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
char * itoa(UInt8 i, char * p)
{
return convert::itoa(uint8_t(i), p);
return jeaiii::to_text_from_integer(p, uint8_t(i));
}
char * itoa(Int8 i, char * p)
{
return convert::itoa(int8_t(i), p);
return jeaiii::to_text_from_integer(p, int8_t(i));
}
char * itoa(UInt128 i, char * p)
@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p)
#define DEFAULT_ITOA(T) \
char * itoa(T i, char * p) \
{ \
return convert::itoa(i, p); \
return jeaiii::to_text_from_integer(p, i); \
}
#define FOR_MISSING_INTEGER_TYPES(M) \

View File

@ -235,8 +235,6 @@ namespace Net
/// Note that simply closing a socket is not sufficient
/// to be able to re-use it again.
Poco::Timespan getMaxTimeout();
private:
SecureSocketImpl(const SecureSocketImpl &);
SecureSocketImpl & operator=(const SecureSocketImpl &);
@ -250,6 +248,9 @@ namespace Net
Session::Ptr _pSession;
friend class SecureStreamSocketImpl;
Poco::Timespan getMaxTimeoutOrLimit();
//// Return max(send, receive) if non zero, otherwise maximum timeout
};

View File

@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake)
if (performHandshake && _pSocket->getBlocking())
{
int ret;
Poco::Timespan remaining_time = getMaxTimeout();
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do
{
RemainingTimeCounter counter(remaining_time);
@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
return rc;
}
Poco::Timespan remaining_time = getMaxTimeout();
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do
{
RemainingTimeCounter counter(remaining_time);
@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags)
return rc;
}
Poco::Timespan remaining_time = getMaxTimeout();
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do
{
/// SSL record may consist of several TCP packets,
@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake()
poco_check_ptr (_pSSL);
int rc;
Poco::Timespan remaining_time = getMaxTimeout();
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do
{
RemainingTimeCounter counter(remaining_time);
@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const
return 0;
}
Poco::Timespan SecureSocketImpl::getMaxTimeout()
Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit()
{
std::lock_guard<std::recursive_mutex> lock(_mutex);
Poco::Timespan remaining_time = _pSocket->getReceiveTimeout();
Poco::Timespan send_timeout = _pSocket->getSendTimeout();
if (remaining_time < send_timeout)
remaining_time = send_timeout;
/// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this
///
/// NOTE: we cannot use INT64_MAX (std::numeric_limits<Poco::Timespan::TimeDiff>::max()),
/// since it will be later passed to poll() which accept int timeout, and
/// even though poll() accepts milliseconds and Timespan() accepts
/// microseconds, let's use smaller maximum value just to avoid some possible
/// issues, this should be enough anyway (it is ~24 days).
if (remaining_time == 0)
remaining_time = Poco::Timespan(std::numeric_limits<int>::max());
return remaining_time;
}
bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
{
if (remaining_time == 0)
return false;
std::lock_guard<std::recursive_mutex> lock(_mutex);
if (rc <= 0)
{
@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
case SSL_ERROR_WANT_READ:
if (_pSocket->getBlocking())
{
/// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket,
/// epoll_wait returns true without waiting for new data even if remaining_time == 0
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0)
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ))
return true;
else
throw Poco::TimeoutException();
@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
case SSL_ERROR_WANT_WRITE:
if (_pSocket->getBlocking())
{
/// The same as for SSL_ERROR_WANT_READ
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0)
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE))
return true;
else
throw Poco::TimeoutException();
}
break;
/// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in
/// OpenSSL), so this likely dead code, but let's leave it for
/// compatibility with other implementations
case SSL_ERROR_SYSCALL:
return socketError == POCO_EAGAIN || socketError == POCO_EINTR;
default:

View File

@ -253,7 +253,7 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
- `TDigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
[TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns.
- `Uniq`
Estimate the number of distinct values of a column by HyperLogLog.
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
## Column-level Settings {#column-level-settings}

View File

@ -6,23 +6,30 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m
<SelfManaged />
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
**Example**
```xml
<clickhouse>
<!- ... -->
<users>
<user_name>
<user_name_1>
<ssl_certificates>
<common_name>host.domain.com:example_user</common_name>
<common_name>host.domain.com:example_user_dev</common_name>
<!-- More names -->
</ssl_certificates>
<!-- Other settings -->
</user_name>
</user_name_1>
<user_name_2>
<ssl_certificates>
<subject_alt_name>DNS:host.domain.com</subject_alt_name>
<!-- More names -->
</ssl_certificates>
<!-- Other settings -->
</user_name_2>
</users>
</clickhouse>
```

View File

@ -2536,7 +2536,7 @@ Possible values:
- 0 — Optimization disabled.
- 1 — Optimization enabled.
Default value: `0`.
Default value: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -1,24 +1,20 @@
---
slug: /en/sql-reference/data-types/json
slug: /en/sql-reference/data-types/object-data-type
sidebar_position: 26
sidebar_label: JSON
sidebar_label: Object Data Type
keywords: [object, data type]
---
# JSON
# Object Data Type
:::note
This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead.
This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864)
:::
Stores JavaScript Object Notation (JSON) documents in a single column.
`JSON` is an alias for `Object('json')`.
:::note
The JSON data type is an obsolete feature. Do not use it.
If you want to use it, set `allow_experimental_object_type = 1`.
:::
## Example
**Example 1**
@ -49,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json
**Example 2**
To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format:
To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format:
```sql
CREATE TABLE logs
@ -69,7 +65,7 @@ FROM file('access.json.gz', JSONAsString)
## Displaying JSON columns
When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`:
When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`:
```sql
SET output_format_json_named_tuples_as_objects = 1
@ -83,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow
## Related Content
- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json)
- [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json)

View File

@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring).
## bitTest
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left.
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0.
**Syntax**
@ -226,7 +226,7 @@ Result:
## bitTestAll
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0.
The conjuction for bit-wise operations:
@ -289,7 +289,7 @@ Result:
## bitTestAny
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0.
The disjunction for bit-wise operations:

View File

@ -3860,3 +3860,138 @@ Result:
└───────────────┘
```
## transactionID
Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::note
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
```
<clickhouse>
<allow_experimental_transactions>1</allow_experimental_transactions>
</clickhouse>
```
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::
**Syntax**
```sql
transactionID()
```
**Returned value**
- Returns a tuple consisting of `start_csn`, `local_tid` and `host_id`. [Tuple](../data-types/tuple.md).
- `start_csn`: Global sequential number, the newest commit timestamp that was seen when this transaction began. [UInt64](../data-types/int-uint.md).
- `local_tid`: Local sequential number that is unique for each transaction started by this host within a specific start_csn. [UInt64](../data-types/int-uint.md).
- `host_id`: UUID of the host that has started this transaction. [UUID](../data-types/uuid.md).
**Example**
Query:
```sql
BEGIN TRANSACTION;
SELECT transactionID();
ROLLBACK;
```
Result:
```response
┌─transactionID()────────────────────────────────┐
│ (32,34,'0ee8b069-f2bb-4748-9eae-069c85b5252b') │
└────────────────────────────────────────────────┘
```
## transactionLatestSnapshot
Returns the newest snapshot (Commit Sequence Number) of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback) that is available for reading.
:::note
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
```
<clickhouse>
<allow_experimental_transactions>1</allow_experimental_transactions>
</clickhouse>
```
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::
**Syntax**
```sql
transactionLatestSnapshot()
```
**Returned value**
- Returns the latest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md)
**Example**
Query:
```sql
BEGIN TRANSACTION;
SELECT transactionLatestSnapshot();
ROLLBACK;
```
Result:
```response
┌─transactionLatestSnapshot()─┐
│ 32 │
└─────────────────────────────┘
```
## transactionOldestSnapshot
Returns the oldest snapshot (Commit Sequence Number) that is visible for some running [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::note
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
```
<clickhouse>
<allow_experimental_transactions>1</allow_experimental_transactions>
</clickhouse>
```
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::
**Syntax**
```sql
transactionOldestSnapshot()
```
**Returned value**
- Returns the oldest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md)
**Example**
Query:
```sql
BEGIN TRANSACTION;
SELECT transactionLatestSnapshot();
ROLLBACK;
```
Result:
```response
┌─transactionOldestSnapshot()─┐
│ 32 │
└─────────────────────────────┘
```

View File

@ -579,7 +579,6 @@ If the length of the UTF-8 byte sequence is different for upper and lower case o
Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
@ -736,7 +735,7 @@ concat(s1, s2, ...)
**Arguments**
At least one value of arbitrary type.
Values of arbitrary type.
Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.

View File

@ -12,7 +12,7 @@ Syntax:
``` sql
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}]
[[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[VALID UNTIL datetime]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]

View File

@ -152,7 +152,7 @@ SELECT * FROM test;
`MATERIALIZED expr`
Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries.
Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s.
Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.

View File

@ -12,7 +12,7 @@ Syntax:
``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[VALID UNTIL datetime]
[IN access_storage_type]

View File

@ -269,9 +269,9 @@ FROM s3(
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}"
- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -80,8 +80,8 @@ These functions can be used only as a window function.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned.
- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used.
## Examples

View File

@ -3,23 +3,30 @@ slug: /ru/operations/external-authenticators/ssl-x509
---
# Аутентификация по сертификату SSL X.509 {#ssl-external-authentication}
[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse.
[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` или `subjectAltName` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse.
Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` для каждого пользователя ClickHouse в файле настройки `config.xml`:
Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` или `subjectAltName` для каждого пользователя ClickHouse в файле настройки `config.xml`:
**Example**
```xml
<clickhouse>
<!- ... -->
<users>
<user_name>
<certificates>
<user_name_1>
<ssl_certificates>
<common_name>host.domain.com:example_user</common_name>
<common_name>host.domain.com:example_user_dev</common_name>
<!-- More names -->
</certificates>
</ssl_certificates>
<!-- Other settings -->
</user_name>
</user_name_1>
<user_name_2>
<ssl_certificates>
<subject_alt_name>DNS:host.domain.com</subject_alt_name>
<!-- More names -->
</ssl_certificates>
<!-- Other settings -->
</user_name_2>
</users>
</clickhouse>
```

View File

@ -2077,7 +2077,7 @@ SELECT * FROM test_table
- 0 — оптимизация отключена.
- 1 — оптимизация включена.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -13,7 +13,7 @@ sidebar_label: "Пользователь"
``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...]]
[DEFAULT DATABASE database | NONE]

View File

@ -3,6 +3,7 @@
#include <IO/ReadBufferFromString.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnNullable.h>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
@ -47,9 +48,17 @@ Chunk ODBCSource::generate()
for (int idx = 0; idx < result.columns(); ++idx)
{
const auto & sample = description.sample_block.getByPosition(idx);
if (!result.is_null(idx))
{
if (columns[idx]->isNullable())
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx);
}
else
insertDefaultValue(*columns[idx], *sample.column);
}

View File

@ -133,10 +133,6 @@
# include <Server/KeeperTCPHandlerFactory.h>
#endif
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#endif
#if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp>
# include <azure/core/diagnostics/logger.hpp>
@ -176,34 +172,10 @@ namespace ProfileEvents
namespace fs = std::filesystem;
#if USE_JEMALLOC
static bool jemallocOptionEnabled(const char *name)
{
bool value;
size_t size = sizeof(value);
if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
throw Poco::SystemException("mallctl() failed");
return value;
}
#else
static bool jemallocOptionEnabled(const char *) { return false; }
#endif
int mainEntryClickHouseServer(int argc, char ** argv)
{
DB::Server app;
if (jemallocOptionEnabled("opt.background_thread"))
{
LOG_ERROR(&app.logger(),
"jemalloc.background_thread was requested, "
"however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
"and also background_thread is not compatible with ClickHouse watchdog "
"(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
}
/// Do not fork separate process from watchdog if we attached to terminal.
/// Otherwise it breaks gdb usage.
/// Can be overridden by environment variable (cannot use server config at this moment).

View File

@ -8,6 +8,7 @@
#include <Common/Exception.h>
#include <Common/SSHWrapper.h>
#include <Common/typeid_cast.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include "config.h"
@ -238,7 +239,15 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());
case AuthenticationType::SSL_CERTIFICATE:
return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName());
for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
{
for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type))
{
if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject))
return true;
}
}
return false;
case AuthenticationType::SSH_KEY:
#if USE_SSH

View File

@ -15,6 +15,7 @@
#include <boost/algorithm/hex.hpp>
#include <boost/algorithm/string/case_conv.hpp>
#include <Access/Common/SSLCertificateSubjects.h>
#include "config.h"
#if USE_SSL
@ -107,7 +108,7 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
{
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
&& (lhs.ssl_certificate_subjects == rhs.ssl_certificate_subjects)
#if USE_SSH
&& (lhs.ssh_keys == rhs.ssh_keys)
#endif
@ -277,11 +278,16 @@ String AuthenticationData::getSalt() const
return salt;
}
void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_)
void AuthenticationData::setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_)
{
if (common_names_.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names.");
ssl_certificate_common_names = std::move(common_names_);
if (ssl_certificate_subjects_.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of subjects.");
ssl_certificate_subjects = std::move(ssl_certificate_subjects_);
}
void AuthenticationData::addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_)
{
ssl_certificate_subjects.insert(type_, std::move(subject_));
}
std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
@ -339,7 +345,14 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
}
case AuthenticationType::SSL_CERTIFICATE:
{
for (const auto & name : getSSLCertificateCommonNames())
using SSLCertificateSubjects::Type::CN;
using SSLCertificateSubjects::Type::SAN;
const auto &subjects = getSSLCertificateSubjects();
SSLCertificateSubjects::Type cert_subject_type = !subjects.at(SAN).empty() ? SAN : CN;
node->ssl_cert_subject_type = toString(cert_subject_type);
for (const auto & name : getSSLCertificateSubjects().at(cert_subject_type))
node->children.push_back(std::make_shared<ASTLiteral>(name));
break;
@ -513,11 +526,9 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
}
else if (query.type == AuthenticationType::SSL_CERTIFICATE)
{
boost::container::flat_set<String> common_names;
auto ssl_cert_subject_type = parseSSLCertificateSubjectType(*query.ssl_cert_subject_type);
for (const auto & arg : args)
common_names.insert(checkAndGetLiteralArgument<String>(arg, "common_name"));
auth_data.setSSLCertificateCommonNames(std::move(common_names));
auth_data.addSSLCertificateSubject(ssl_cert_subject_type, checkAndGetLiteralArgument<String>(arg, "ssl_certificate_subject"));
}
else if (query.type == AuthenticationType::HTTP)
{

View File

@ -2,13 +2,14 @@
#include <Access/Common/AuthenticationType.h>
#include <Access/Common/HTTPAuthenticationScheme.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Common/SSHWrapper.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <vector>
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include "config.h"
@ -58,8 +59,9 @@ public:
const String & getKerberosRealm() const { return kerberos_realm; }
void setKerberosRealm(const String & realm) { kerberos_realm = realm; }
const boost::container::flat_set<String> & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; }
void setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_);
const SSLCertificateSubjects & getSSLCertificateSubjects() const { return ssl_certificate_subjects; }
void setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_);
void addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_);
#if USE_SSH
const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; }
@ -96,7 +98,7 @@ private:
Digest password_hash;
String ldap_server_name;
String kerberos_realm;
boost::container::flat_set<String> ssl_certificate_common_names;
SSLCertificateSubjects ssl_certificate_subjects;
String salt;
#if USE_SSH
std::vector<SSHKey> ssh_keys;

View File

@ -0,0 +1,95 @@
#include <Access/Common/SSLCertificateSubjects.h>
#include <Common/Exception.h>
#if USE_SSL
#include <openssl/x509v3.h>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
#if USE_SSL
SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate)
{
SSLCertificateSubjects subjects;
if (!certificate.commonName().empty())
{
subjects.insert(SSLCertificateSubjects::Type::CN, certificate.commonName());
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
auto stackof_general_name_deleter = [](void * ptr) { GENERAL_NAMES_free(static_cast<STACK_OF(GENERAL_NAME) *>(ptr)); };
std::unique_ptr<void, decltype(stackof_general_name_deleter)> cert_names(
X509_get_ext_d2i(const_cast<X509 *>(certificate.certificate()), NID_subject_alt_name, nullptr, nullptr),
stackof_general_name_deleter);
if (STACK_OF(GENERAL_NAME) * names = static_cast<STACK_OF(GENERAL_NAME) *>(cert_names.get()))
{
for (int i = 0; i < sk_GENERAL_NAME_num(names); ++i)
{
const GENERAL_NAME * name = sk_GENERAL_NAME_value(names, i);
if (name->type == GEN_DNS || name->type == GEN_URI)
{
const char * data = reinterpret_cast<const char *>(ASN1_STRING_get0_data(name->d.ia5));
std::size_t len = ASN1_STRING_length(name->d.ia5);
std::string subject = (name->type == GEN_DNS ? "DNS:" : "URI:") + std::string(data, len);
subjects.insert(SSLCertificateSubjects::Type::SAN, std::move(subject));
}
}
}
#pragma clang diagnostic pop
return subjects;
}
#endif
void SSLCertificateSubjects::insert(const String & subject_type_, String && subject)
{
insert(parseSSLCertificateSubjectType(subject_type_), std::move(subject));
}
void SSLCertificateSubjects::insert(Type subject_type_, String && subject)
{
subjects[static_cast<size_t>(subject_type_)].insert(std::move(subject));
}
SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_)
{
if (type_ == "CN")
return SSLCertificateSubjects::Type::CN;
if (type_ == "SAN")
return SSLCertificateSubjects::Type::SAN;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown SSL Certificate Subject Type: {}", type_);
}
String toString(SSLCertificateSubjects::Type type_)
{
switch (type_)
{
case SSLCertificateSubjects::Type::CN:
return "CN";
case SSLCertificateSubjects::Type::SAN:
return "SAN";
}
}
bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs)
{
for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
{
if (lhs.at(type) != rhs.at(type))
return false;
}
return true;
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include "config.h"
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#if USE_SSL
# include <Poco/Net/X509Certificate.h>
#endif
namespace DB
{
class SSLCertificateSubjects
{
public:
using container = boost::container::flat_set<String>;
enum class Type
{
CN,
SAN
};
private:
std::array<container, size_t(Type::SAN) + 1> subjects;
public:
inline const container & at(Type type_) const { return subjects[static_cast<size_t>(type_)]; }
inline bool empty()
{
for (auto & subject_list : subjects)
{
if (!subject_list.empty())
return false;
}
return true;
}
void insert(const String & subject_type_, String && subject);
void insert(Type type_, String && subject);
friend bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs);
};
String toString(SSLCertificateSubjects::Type type_);
SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_);
#if USE_SSL
SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate);
#endif
}

View File

@ -1,7 +1,7 @@
#include <Access/Credentials.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Common/Exception.h>
namespace DB
{
@ -48,18 +48,18 @@ void AlwaysAllowCredentials::setUserName(const String & user_name_)
user_name = user_name_;
}
SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, const String & common_name_)
SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_)
: Credentials(user_name_)
, common_name(common_name_)
, certificate_subjects(subjects_)
{
is_ready = true;
}
const String & SSLCertificateCredentials::getCommonName() const
const SSLCertificateSubjects & SSLCertificateCredentials::getSSLCertificateSubjects() const
{
if (!isReady())
throwNotReady();
return common_name;
return certificate_subjects;
}
BasicCredentials::BasicCredentials()

View File

@ -1,6 +1,8 @@
#pragma once
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include <Access/Common/SSLCertificateSubjects.h>
#include <memory>
#include "config.h"
@ -42,11 +44,11 @@ class SSLCertificateCredentials
: public Credentials
{
public:
explicit SSLCertificateCredentials(const String & user_name_, const String & common_name_);
const String & getCommonName() const;
explicit SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_);
const SSLCertificateSubjects & getSSLCertificateSubjects() const;
private:
String common_name;
SSLCertificateSubjects certificate_subjects;
};
class BasicCredentials

View File

@ -1,4 +1,5 @@
#include <Access/UsersConfigAccessStorage.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Access/Quota.h>
#include <Access/RowPolicy.h>
#include <Access/User.h>
@ -194,18 +195,23 @@ namespace
/// Fill list of allowed certificates.
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(certificates_config, keys);
boost::container::flat_set<String> common_names;
for (const String & key : keys)
{
if (key.starts_with("common_name"))
{
String value = config.getString(certificates_config + "." + key);
common_names.insert(std::move(value));
user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::CN, std::move(value));
}
else if (key.starts_with("subject_alt_name"))
{
String value = config.getString(certificates_config + "." + key);
if (value.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty");
user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::SAN, std::move(value));
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key);
}
user->auth_data.setSSLCertificateCommonNames(std::move(common_names));
}
else if (has_ssh_keys)
{

View File

@ -9,6 +9,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -164,32 +165,15 @@ private:
auto aggregate_function_clone = aggregate_function->clone();
auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>();
aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument };
resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name);
resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name);
arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone);
resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName());
resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext());
return arithmetic_function_clone;
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -11,6 +11,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -18,19 +19,18 @@ namespace DB
namespace
{
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor<ComparisonTupleEliminationPassVisitor>
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>
{
public:
explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_)
: context(std::move(context_))
{}
using Base = InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>;
using Base::Base;
static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
{
return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION;
}
void visitImpl(QueryTreeNodePtr & node) const
void enterImpl(QueryTreeNodePtr & node) const
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
@ -171,13 +171,13 @@ private:
{
auto result_function = std::make_shared<FunctionNode>("and");
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext());
if (comparison_function_name == "notEquals")
{
auto not_function = std::make_shared<FunctionNode>("not");
not_function->getArguments().getNodes().push_back(std::move(result_function));
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext());
result_function = std::move(not_function);
}
@ -197,18 +197,10 @@ private:
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName());
resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext());
return comparison_function;
}
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
ContextPtr context;
};
}

View File

@ -9,6 +9,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -77,11 +78,9 @@ public:
/// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
}
};

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeVariant.h>
#include <Storages/IStorage.h>
@ -16,6 +17,9 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/JoinNode.h>
namespace DB
{
@ -23,211 +27,426 @@ namespace DB
namespace
{
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>
struct ColumnContext
{
NameAndTypePair column;
QueryTreeNodePtr column_source;
ContextPtr context;
};
using NodeToSubcolumnTransformer = std::function<void(QueryTreeNodePtr &, FunctionNode &, ColumnContext &)>;
void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `length(argument)` with `argument.size0`
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
template <bool positive>
void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive
/// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, ctx.column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
const auto * function_name = positive ? "equals" : "notEquals";
resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context);
}
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
if (value.getType() == Field::Types::UInt64)
return data_type_tuple.getNameByPosition(value.get<UInt64>());
return "";
}
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
return "";
}
template <typename DataType>
void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`.
/// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`.
auto & function_arguments_nodes = function_node.getArguments().getNodes();
if (function_arguments_nodes.size() != 2)
return;
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (!second_argument_constant_node)
return;
const auto & data_type_concrete = assert_cast<const DataType &>(*ctx.column.type);
auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete);
if (subcolumn_name.empty())
return;
NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
std::map<std::pair<TypeIndex, String>, NodeToSubcolumnTransformer> node_transformers =
{
{
{TypeIndex::Array, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Array, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Map, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "mapKeys"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapValues"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapContains"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
const auto & data_type_map = assert_cast<const DataTypeMap &>(*ctx.column.type);
NameAndTypePair column{ctx.column.name + ".keys", std::make_shared<DataTypeArray>(data_type_map.getKeyType())};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto has_function_argument = std::make_shared<ColumnNode>(column, ctx.column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context);
},
},
{
{TypeIndex::Nullable, "count"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto new_column_node = std::make_shared<ColumnNode>(column, ctx.column_source);
auto function_node_not = std::make_shared<FunctionNode>("not");
function_node_not->getArguments().getNodes().push_back(std::move(new_column_node));
resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context);
function_arguments_nodes = {std::move(function_node_not)};
resolveAggregateFunctionNodeByName(function_node, "sum");
},
},
{
{TypeIndex::Nullable, "isNull"},
[](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Nullable, "isNotNull"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, ctx.column_source)};
resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context);
},
},
{
{TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement<DataTypeTuple>,
},
{
{TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement<DataTypeVariant>,
},
};
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return {};
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2)
return {};
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set")
return {};
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return {};
const auto & storage = table_node->getStorage();
const auto & storage_snapshot = table_node->getStorageSnapshot();
auto column = first_argument_column_node->getColumn();
if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata))
return {};
auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name);
if (!column_in_table || !column_in_table->type->equals(*column.type))
return {};
return std::make_tuple(function_node, first_argument_column_node, table_node);
}
/// First pass collects info about identifiers to determine which identifiers are allowed to optimize.
class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>;
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>;
using Base::Base;
void enterImpl(const QueryTreeNodePtr & node)
{
if (!getSettings().optimize_functions_to_subcolumns)
return;
if (auto * table_node = node->as<TableNode>())
{
enterImpl(*table_node);
return;
}
if (auto * column_node = node->as<ColumnNode>())
{
enterImpl(*column_node);
return;
}
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node);
if (function_node && first_argument_node && table_node)
{
enterImpl(*function_node, *first_argument_node, *table_node);
return;
}
if (const auto * join_node = node->as<JoinNode>())
{
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls;
return;
}
if (const auto * query_node = node->as<QueryNode>())
{
if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets())
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls;
return;
}
}
std::unordered_set<Identifier> getIdentifiersToOptimize() const
{
if (can_wrap_result_columns_with_nullable)
{
/// Do not optimize if we have JOIN with setting join_use_null.
/// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls.
/// It may change the behaviour if subcolumn can be converted
/// to Nullable while the original column cannot (e.g. for Array type).
return {};
}
/// Do not optimize if full column is requested in other context.
/// It doesn't make sense because it doesn't reduce amount of read data
/// and optimized functions are not computation heavy. But introducing
/// new identifier complicates query analysis and may break it.
///
/// E.g. query:
/// SELECT n FROM table GROUP BY n HAVING isNotNull(n)
/// may be optimized to incorrect query:
/// SELECT n FROM table GROUP BY n HAVING not(n.null)
/// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys)
///
/// Do not optimize index columns (primary, min-max, secondary),
/// because otherwise analysis of indexes may be broken.
/// TODO: handle subcolumns in index analysis.
std::unordered_set<Identifier> identifiers_to_optimize;
for (const auto & [identifier, count] : optimized_identifiers_count)
{
if (all_key_columns.contains(identifier))
continue;
auto it = identifiers_count.find(identifier);
if (it != identifiers_count.end() && it->second == count)
identifiers_to_optimize.insert(identifier);
}
return identifiers_to_optimize;
}
private:
std::unordered_set<Identifier> all_key_columns;
std::unordered_map<Identifier, UInt64> identifiers_count;
std::unordered_map<Identifier, UInt64> optimized_identifiers_count;
NameSet processed_tables;
bool can_wrap_result_columns_with_nullable = false;
void enterImpl(const TableNode & table_node)
{
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
if (processed_tables.emplace(table_name).second)
return;
auto add_key_columns = [&](const auto & key_columns)
{
for (const auto & column_name : key_columns)
{
Identifier identifier({table_name, column_name});
all_key_columns.insert(identifier);
}
};
const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata;
const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey();
const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey();
add_key_columns(primary_key_columns);
add_key_columns(partition_key_columns);
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
const auto & index_columns = index.expression->getRequiredColumns();
add_key_columns(index_columns);
}
}
void enterImpl(const ColumnNode & column_node)
{
if (column_node.getColumnName() == "__grouping_set")
return;
auto column_source = column_node.getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column_node.getColumnName()});
++identifiers_count[qualified_name];
}
void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node)
{
/// For queries with FINAL converting function to subcolumn may alter
/// special merging algorithms and produce wrong result of query.
if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal())
return;
const auto & column = first_argument_column_node.getColumn();
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column.name});
if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()}))
++optimized_identifiers_count[qualified_name];
}
};
/// Second pass optimizes functions to subcolumns for allowed identifiers.
class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>
{
private:
std::unordered_set<Identifier> identifiers_to_optimize;
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>;
using Base::Base;
FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set<Identifier> identifiers_to_optimize_)
: Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_))
{
}
void enterImpl(QueryTreeNodePtr & node) const
{
if (!getSettings().optimize_functions_to_subcolumns)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
size_t function_arguments_nodes_size = function_arguments_nodes.size();
if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2)
return;
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node)
return;
if (first_argument_column_node->getColumnName() == "__grouping_set")
return;
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
const auto & storage = table_node->getStorage();
if (!storage->supportsSubcolumns())
auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node);
if (!function_node || !first_argument_column_node || !table_node)
return;
auto column = first_argument_column_node->getColumn();
WhichDataType column_type(column.type);
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
const auto & function_name = function_node->getFunctionName();
if (function_arguments_nodes_size == 1)
{
if (column_type.isArray())
{
if (function_name == "length")
{
/// Replace `length(array_argument)` with `array_argument.size0`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "empty")
{
/// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "equals");
}
else if (function_name == "notEmpty")
{
/// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "notEquals");
}
}
else if (column_type.isNullable())
{
if (function_name == "isNull")
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "isNotNull")
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, column_source)};
resolveOrdinaryFunctionNode(*function_node, "not");
}
}
else if (column_type.isMap())
{
if (function_name == "mapKeys")
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
column.name += ".keys";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapValues")
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
column.name += ".values";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
}
}
else
{
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node)
{
/** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)`
* with `tuple_argument.column_name`.
*/
const auto & tuple_element_constant_value = second_argument_constant_node->getValue();
const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType();
const auto & data_type_tuple = assert_cast<const DataTypeTuple &>(*column.type);
String subcolumn_name;
if (tuple_element_constant_value_type == Field::Types::String)
{
subcolumn_name = tuple_element_constant_value.get<const String &>();
}
else if (tuple_element_constant_value_type == Field::Types::UInt64)
{
auto tuple_column_index = tuple_element_constant_value.get<UInt64>();
subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index);
}
else
{
return;
}
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
Identifier qualified_name({table_name, column.name});
if (!identifiers_to_optimize.contains(qualified_name))
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap())
auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()});
if (transformer_it != node_transformers.end())
{
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
column.name += ".keys";
column.type = std::make_shared<DataTypeArray>(data_type_map.getKeyType());
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNode(*function_node, "has");
ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()};
transformer_it->second(node, *function_node, ctx);
}
}
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
};
}
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
FunctionToSubcolumnsVisitor visitor(context);
visitor.visit(query_tree_node);
FunctionToSubcolumnsVisitorFirstPass first_visitor(context);
first_visitor.visit(query_tree_node);
auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize();
if (identifiers_to_optimize.empty())
return;
FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize));
second_visitor.visit(query_tree_node);
}
}

View File

@ -6,6 +6,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypesNumber.h>
@ -47,25 +48,17 @@ public:
if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
}
else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1)
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
}
}
private:
static void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -5,6 +5,7 @@
#include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h>

View File

@ -74,8 +74,7 @@ public:
new_arguments[1] = std::move(if_arguments_nodes[0]);
function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
}
}
else if (first_const_node)
@ -104,27 +103,10 @@ public:
new_arguments[1] = std::move(not_function);
function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
}
}
}
private:
static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + "If",
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}

View File

@ -73,23 +73,24 @@ public:
const auto lhs = std::make_shared<FunctionNode>("sum");
lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*lhs, column_type);
resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName());
const auto rhs_count = std::make_shared<FunctionNode>("count");
rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*rhs_count, column_type);
resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("multiply");
rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]);
rhs->getArguments().getNodes().push_back(rhs_count);
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
if (column_id == 0)
new_node->getArguments().getNodes() = {lhs, rhs};
else if (column_id == 1)
new_node->getArguments().getNodes() = {rhs, lhs};
resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext());
if (!new_node)
return;
@ -100,28 +101,7 @@ public:
res = createCastFunction(res, function_node->getResultType(), getContext());
node = std::move(res);
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
const auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),
NullsAction::EMPTY,
{argument_type},
{},
properties);
function_node.resolveAsAggregateFunction(aggregate_function);
}
};
}

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/Utils.h>
#include <Functions/FunctionFactory.h>
@ -65,7 +66,8 @@ public:
auto multiplier_node = function_node_arguments_nodes[0];
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (constant_value_literal.get<UInt64>() != 1)
{
@ -115,7 +117,7 @@ public:
function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_true_condition_value != 1)
{
@ -144,7 +146,7 @@ public:
function_node_arguments_nodes[0] = std::move(not_function);
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_false_condition_value != 1)
{
@ -156,15 +158,6 @@ public:
}
private:
static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
{
auto multiply_function_node = std::make_shared<FunctionNode>("multiply");

View File

@ -7,6 +7,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB

View File

@ -7,6 +7,7 @@
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -184,11 +185,8 @@ public:
/// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
resolveAggregateFunctionNodeByName(*function_node, "count");
}
}
};

View File

@ -636,16 +636,16 @@ private:
bool has_function = false;
};
inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node)
inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name)
{
Array parameters;
for (const auto & param : function_node->getParameters())
for (const auto & param : function_node.getParameters())
{
auto * constant = param->as<ConstantNode>();
parameters.push_back(constant->getValue());
}
const auto & function_node_argument_nodes = function_node->getArguments().getNodes();
const auto & function_node_argument_nodes = function_node.getArguments().getNodes();
DataTypes argument_types;
argument_types.reserve(function_node_argument_nodes.size());
@ -655,7 +655,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties);
return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties);
}
}
@ -736,11 +736,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context)
{
if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull")
return;
function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node));
function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
}
else if (function_node->isWindowFunction())
{
function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node));
function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
}
}
@ -793,6 +793,18 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node;
}
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context)
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name)
{
auto aggregate_function = resolveAggregateFunction(function_node, function_name);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
/** Returns:
* {_, false} - multiple sources
* {nullptr, true} - no sources (for constants)

View File

@ -112,6 +112,14 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Resolves function node as ordinary function with given name.
/// Arguments and parameters are taken from the node.
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context);
/// Resolves function node as aggregate function with given name.
/// Arguments and parameters are taken from the node.
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name);
/// Checks that node has only one source and returns it
QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node);

View File

@ -210,6 +210,7 @@ add_object_library(clickhouse_analyzer_passes Analyzer/Resolve)
add_object_library(clickhouse_planner Planner)
add_object_library(clickhouse_interpreters Interpreters)
add_object_library(clickhouse_interpreters_cache Interpreters/Cache)
add_object_library(clickhouse_interpreters_hash_join Interpreters/HashJoin)
add_object_library(clickhouse_interpreters_access Interpreters/Access)
add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL)
add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy)

View File

@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
if (local_format_error)
std::rethrow_exception(local_format_error);
if (cancelled && is_interactive)
{
if (cancelled && is_interactive && !cancelled_printed.exchange(true))
output_stream << "Query was cancelled." << std::endl;
cancelled_printed = true;
}
}
@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream()
if (is_interactive)
{
if (cancelled && !cancelled_printed)
if (cancelled && !cancelled_printed.exchange(true))
output_stream << "Query was cancelled." << std::endl;
else if (!written_first_block)
output_stream << "Ok." << std::endl;

View File

@ -338,8 +338,8 @@ protected:
bool allow_repeated_settings = false;
bool allow_merge_tree_settings = false;
bool cancelled = false;
bool cancelled_printed = false;
std::atomic_bool cancelled = false;
std::atomic_bool cancelled_printed = false;
/// Unpacked descriptors and streams for the ease of use.
int in_fd = STDIN_FILENO;

View File

@ -1,8 +1,6 @@
#pragma once
#include <deque>
#include <type_traits>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <optional>
@ -200,22 +198,18 @@ public:
*/
bool finish()
{
bool was_finished_before = false;
{
std::lock_guard lock(queue_mutex);
if (is_finished)
return true;
was_finished_before = is_finished;
is_finished = true;
}
pop_condition.notify_all();
push_condition.notify_all();
return was_finished_before;
return false;
}
/// Returns if queue is finished

View File

@ -447,14 +447,18 @@ The server successfully detected this situation and will download merged part fr
M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
\
M(AzureGetObject, "Number of Azure API GetObject calls.") \
M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \
M(AzureUpload, "Number of Azure blob storage API Upload calls") \
M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \
M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \
M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \
\
M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \
M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \
M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \
M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \
M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \
M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
@ -611,6 +615,13 @@ The server successfully detected this situation and will download merged part fr
M(KeeperPacketsReceived, "Packets received by keeper server") \
M(KeeperRequestTotal, "Total requests number on keeper server") \
M(KeeperLatency, "Keeper latency") \
M(KeeperTotalElapsedMicroseconds, "Keeper total latency for a single request") \
M(KeeperProcessElapsedMicroseconds, "Keeper commit latency for a single request") \
M(KeeperPreprocessElapsedMicroseconds, "Keeper preprocessing latency for a single reuquest") \
M(KeeperStorageLockWaitMicroseconds, "Time spent waiting for acquiring Keeper storage lock") \
M(KeeperCommitWaitElapsedMicroseconds, "Time spent waiting for certain log to be committed") \
M(KeeperBatchMaxCount, "Number of times the size of batch was limited by the amount") \
M(KeeperBatchMaxTotalSize, "Number of times the size of batch was limited by the total bytes size") \
M(KeeperCommits, "Number of successful commits") \
M(KeeperCommitsFailed, "Number of failed commits") \
M(KeeperSnapshotCreations, "Number of snapshots creations")\

View File

@ -9,7 +9,6 @@
#include <IO/ReadHelpers.h>
#include <fmt/format.h>
#include <Common/logger_useful.h>
#include <array>
namespace Coordination
@ -29,7 +28,7 @@ void ZooKeeperResponse::write(WriteBuffer & out) const
Coordination::write(buf.str(), out);
}
std::string ZooKeeperRequest::toString() const
std::string ZooKeeperRequest::toString(bool short_format) const
{
return fmt::format(
"XID = {}\n"
@ -37,7 +36,7 @@ std::string ZooKeeperRequest::toString() const
"Additional info:\n{}",
xid,
getOpNum(),
toStringImpl());
toStringImpl(short_format));
}
void ZooKeeperRequest::write(WriteBuffer & out) const
@ -60,7 +59,7 @@ void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
Coordination::read(path, in);
}
std::string ZooKeeperSyncRequest::toStringImpl() const
std::string ZooKeeperSyncRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}", path);
}
@ -91,7 +90,7 @@ void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in);
}
std::string ZooKeeperReconfigRequest::toStringImpl() const
std::string ZooKeeperReconfigRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"joining = {}\nleaving = {}\nnew_members = {}\nversion = {}",
@ -145,7 +144,7 @@ void ZooKeeperAuthRequest::readImpl(ReadBuffer & in)
Coordination::read(data, in);
}
std::string ZooKeeperAuthRequest::toStringImpl() const
std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"type = {}\n"
@ -191,7 +190,7 @@ void ZooKeeperCreateRequest::readImpl(ReadBuffer & in)
is_sequential = true;
}
std::string ZooKeeperCreateRequest::toStringImpl() const
std::string ZooKeeperCreateRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"path = {}\n"
@ -218,7 +217,7 @@ void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const
Coordination::write(version, out);
}
std::string ZooKeeperRemoveRequest::toStringImpl() const
std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"path = {}\n"
@ -245,7 +244,7 @@ void ZooKeeperExistsRequest::readImpl(ReadBuffer & in)
Coordination::read(has_watch, in);
}
std::string ZooKeeperExistsRequest::toStringImpl() const
std::string ZooKeeperExistsRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}", path);
}
@ -272,7 +271,7 @@ void ZooKeeperGetRequest::readImpl(ReadBuffer & in)
Coordination::read(has_watch, in);
}
std::string ZooKeeperGetRequest::toStringImpl() const
std::string ZooKeeperGetRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}", path);
}
@ -303,7 +302,7 @@ void ZooKeeperSetRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in);
}
std::string ZooKeeperSetRequest::toStringImpl() const
std::string ZooKeeperSetRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"path = {}\n"
@ -334,7 +333,7 @@ void ZooKeeperListRequest::readImpl(ReadBuffer & in)
Coordination::read(has_watch, in);
}
std::string ZooKeeperListRequest::toStringImpl() const
std::string ZooKeeperListRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}", path);
}
@ -356,7 +355,7 @@ void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in)
list_request_type = static_cast<ListRequestType>(read_request_type);
}
std::string ZooKeeperFilteredListRequest::toStringImpl() const
std::string ZooKeeperFilteredListRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format(
"path = {}\n"
@ -401,7 +400,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in);
}
std::string ZooKeeperSetACLRequest::toStringImpl() const
std::string ZooKeeperSetACLRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}\nversion = {}", path, version);
}
@ -426,7 +425,7 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const
Coordination::write(path, out);
}
std::string ZooKeeperGetACLRequest::toStringImpl() const
std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}", path);
}
@ -455,7 +454,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in);
}
std::string ZooKeeperCheckRequest::toStringImpl() const
std::string ZooKeeperCheckRequest::toStringImpl(bool /*short_format*/) const
{
return fmt::format("path = {}\nversion = {}", path, version);
}
@ -600,8 +599,11 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
}
}
std::string ZooKeeperMultiRequest::toStringImpl() const
std::string ZooKeeperMultiRequest::toStringImpl(bool short_format) const
{
if (short_format)
return fmt::format("Subrequests size = {}", requests.size());
auto out = fmt::memory_buffer();
for (const auto & request : requests)
{

View File

@ -63,12 +63,12 @@ struct ZooKeeperRequest : virtual Request
/// Writes length, xid, op_num, then the rest.
void write(WriteBuffer & out) const;
std::string toString() const;
std::string toString(bool short_format = false) const;
virtual void writeImpl(WriteBuffer &) const = 0;
virtual void readImpl(ReadBuffer &) = 0;
virtual std::string toStringImpl() const { return ""; }
virtual std::string toStringImpl(bool /*short_format*/) const { return ""; }
static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
@ -98,7 +98,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Sync; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -123,7 +123,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Reconfig; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -176,7 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Auth; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -229,7 +229,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -266,7 +266,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Remove; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -293,7 +293,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Exists; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; }
@ -320,7 +320,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Get; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; }
@ -347,7 +347,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Set; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -375,7 +375,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::List; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; }
@ -395,7 +395,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest
OpNum getOpNum() const override { return OpNum::FilteredList; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); }
};
@ -428,7 +428,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest
OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; }
@ -469,7 +469,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::SetACL; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
@ -490,7 +490,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::GetACL; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; }
@ -516,7 +516,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override;
std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override;

View File

@ -169,6 +169,23 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
writeText("async_replication=", buf);
write_bool(coordination_settings->async_replication);
writeText("latest_logs_cache_size_threshold=", buf);
write_int(coordination_settings->latest_logs_cache_size_threshold);
writeText("commit_logs_cache_size_threshold=", buf);
write_int(coordination_settings->commit_logs_cache_size_threshold);
writeText("disk_move_retries_wait_ms=", buf);
write_int(coordination_settings->disk_move_retries_wait_ms);
writeText("disk_move_retries_during_init=", buf);
write_int(coordination_settings->disk_move_retries_during_init);
writeText("log_slow_total_threshold_ms=", buf);
write_int(coordination_settings->log_slow_total_threshold_ms);
writeText("log_slow_cpu_threshold_ms=", buf);
write_int(coordination_settings->log_slow_cpu_threshold_ms);
writeText("log_slow_connection_operation_threshold_ms=", buf);
write_int(coordination_settings->log_slow_connection_operation_threshold_ms);
}
KeeperConfigurationAndSettingsPtr

View File

@ -58,7 +58,10 @@ struct Settings;
M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0)
M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -150,12 +150,18 @@
M(S3PutObject) \
M(S3GetObject) \
\
M(AzureUploadPart) \
M(DiskAzureUploadPart) \
M(AzureUpload) \
M(DiskAzureUpload) \
M(AzureStageBlock) \
M(DiskAzureStageBlock) \
M(AzureCommitBlockList) \
M(DiskAzureCommitBlockList) \
M(AzureCopyObject) \
M(DiskAzureCopyObject) \
M(AzureDeleteObjects) \
M(DiskAzureDeleteObjects) \
M(AzureListObjects) \
M(DiskAzureListObjects) \
\
M(DiskS3DeleteObjects) \
M(DiskS3CopyObject) \
@ -238,6 +244,13 @@
M(KeeperPacketsReceived) \
M(KeeperRequestTotal) \
M(KeeperLatency) \
M(KeeperTotalElapsedMicroseconds) \
M(KeeperProcessElapsedMicroseconds) \
M(KeeperPreprocessElapsedMicroseconds) \
M(KeeperStorageLockWaitMicroseconds) \
M(KeeperCommitWaitElapsedMicroseconds) \
M(KeeperBatchMaxCount) \
M(KeeperBatchMaxTotalSize) \
M(KeeperCommits) \
M(KeeperCommitsFailed) \
M(KeeperSnapshotCreations) \

View File

@ -31,6 +31,13 @@ namespace CurrentMetrics
extern const Metric KeeperOutstandingRequets;
}
namespace ProfileEvents
{
extern const Event KeeperCommitWaitElapsedMicroseconds;
extern const Event KeeperBatchMaxCount;
extern const Event KeeperBatchMaxTotalSize;
}
using namespace std::chrono_literals;
namespace DB
@ -119,6 +126,7 @@ void KeeperDispatcher::requestThread()
auto coordination_settings = configuration_and_settings->coordination_settings;
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size;
size_t max_batch_size = coordination_settings->max_requests_batch_size;
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
@ -188,7 +196,6 @@ void KeeperDispatcher::requestThread()
return false;
};
size_t max_batch_size = coordination_settings->max_requests_batch_size;
while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request
&& current_batch_bytes_size < max_batch_bytes_size && try_get_request())
;
@ -225,6 +232,12 @@ void KeeperDispatcher::requestThread()
/// Process collected write requests batch
if (!current_batch.empty())
{
if (current_batch.size() == max_batch_size)
ProfileEvents::increment(ProfileEvents::KeeperBatchMaxCount, 1);
if (current_batch_bytes_size == max_batch_bytes_size)
ProfileEvents::increment(ProfileEvents::KeeperBatchMaxTotalSize, 1);
LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size);
auto result = server->putRequestBatch(current_batch);
@ -243,6 +256,8 @@ void KeeperDispatcher::requestThread()
/// If we will execute read or reconfig next, we have to process result now
if (execute_requests_after_write)
{
Stopwatch watch;
SCOPE_EXIT(ProfileEvents::increment(ProfileEvents::KeeperCommitWaitElapsedMicroseconds, watch.elapsedMicroseconds()));
if (prev_result)
result_buf = forceWaitAndProcessResult(
prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write);
@ -319,19 +334,13 @@ void KeeperDispatcher::snapshotThread()
{
setThreadName("KeeperSnpT");
const auto & shutdown_called = keeper_context->isShutdownCalled();
while (!shutdown_called)
{
CreateSnapshotTask task;
if (!snapshots_queue.pop(task))
break;
while (snapshots_queue.pop(task))
{
try
{
auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called);
if (shutdown_called)
break;
if (!snapshot_file_info)
continue;

View File

@ -1,12 +1,14 @@
#include <atomic>
#include <cerrno>
#include <chrono>
#include <Coordination/KeeperDispatcher.h>
#include <Coordination/KeeperReconfiguration.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperDispatcher.h>
#include <Coordination/KeeperStorage.h>
#include <Coordination/KeeperReconfiguration.h>
#include <Coordination/ReadBufferFromNuraftBuffer.h>
#include <Coordination/WriteBufferFromNuraftBuffer.h>
#include <Disks/DiskLocal.h>
#include <IO/ReadHelpers.h>
#include <base/defines.h>
#include <base/errnoToString.h>
@ -17,7 +19,6 @@
#include <Common/ZooKeeper/ZooKeeperCommon.h>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Common/logger_useful.h>
#include <Disks/DiskLocal.h>
namespace ProfileEvents
@ -31,6 +32,7 @@ namespace ProfileEvents
extern const Event KeeperSnapshotApplysFailed;
extern const Event KeeperReadSnapshot;
extern const Event KeeperSaveSnapshot;
extern const Event KeeperStorageLockWaitMicroseconds;
}
namespace DB
@ -151,6 +153,20 @@ void assertDigest(
}
}
struct TSA_SCOPED_LOCKABLE LockGuardWithStats final
{
std::unique_lock<std::mutex> lock;
explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex)
{
Stopwatch watch;
std::unique_lock l(mutex);
ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds());
lock = std::move(l);
}
~LockGuardWithStats() TSA_RELEASE() = default;
};
}
nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data)
@ -272,7 +288,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig)
return true;
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
if (storage->isFinalized())
return false;
@ -302,7 +318,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session)
{
std::lock_guard _(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session);
if (!responses_queue.push(response))
{
@ -391,7 +407,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session))
return nullptr;
auto try_push = [this](const KeeperStorage::ResponseForSession& response)
auto try_push = [&](const KeeperStorage::ResponseForSession& response)
{
if (!responses_queue.push(response))
{
@ -400,6 +416,17 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
"Failed to push response with session id {} to the queue, probably because of shutdown",
response.session_id);
}
using namespace std::chrono;
uint64_t elapsed = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count() - request_for_session->time;
if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms)
{
LOG_INFO(
log,
"Total time to process a request took too long ({}ms).\nRequest info: {}",
elapsed,
request_for_session->request->toString(/*short_format=*/true));
}
};
try
@ -417,7 +444,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
response_for_session.session_id = -1;
response_for_session.response = response;
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
session_id = storage->getSessionID(session_id_request.session_timeout_ms);
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
response->session_id = session_id;
@ -426,12 +453,13 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
else
{
if (op_num == Coordination::OpNum::Close)
{
std::lock_guard lock(request_cache_mutex);
parsed_request_cache.erase(request_for_session->session_id);
}
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
KeeperStorage::ResponsesForSessions responses_for_sessions
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
for (auto & response_for_session : responses_for_sessions)
@ -482,7 +510,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
}
{ /// deserialize and apply snapshot to storage
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
SnapshotDeserializationResult snapshot_deserialization_result;
if (latest_snapshot_ptr)
@ -534,7 +562,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return;
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
storage->rollbackRequest(request_for_session.zxid, allow_missing);
}
@ -561,7 +589,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf);
CreateSnapshotTask snapshot_task;
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy, getClusterConfig());
}
@ -569,7 +597,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup)
{
nuraft::ptr<std::exception> exception(nullptr);
bool ret = true;
bool ret = false;
if (!execute_only_cleanup)
{
try
@ -599,7 +627,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
else
{
auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot);
auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx());
auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(
*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx());
latest_snapshot_info = std::move(snapshot_info);
latest_snapshot_buf = std::move(snapshot_buf);
}
@ -612,18 +641,19 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
latest_snapshot_info->path);
}
}
ret = true;
}
catch (...)
{
ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed);
LOG_TRACE(log, "Exception happened during snapshot");
tryLogCurrentException(log);
ret = false;
}
}
{
/// Destroy snapshot with lock
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
LOG_TRACE(log, "Clearing garbage after snapshot");
/// Turn off "snapshot mode" and clear outdate part of storage state
storage->clearGarbageAfterSnapshot();
@ -764,7 +794,7 @@ int KeeperStateMachine::read_logical_snp_obj(
void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session)
{
/// Pure local request, just process it with storage
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
auto responses = storage->processRequest(
request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/);
for (const auto & response : responses)
@ -774,97 +804,97 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi
void KeeperStateMachine::shutdownStorage()
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
storage->finalize();
}
std::vector<int64_t> KeeperStateMachine::getDeadSessions()
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getDeadSessions();
}
int64_t KeeperStateMachine::getNextZxid() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNextZXID();
}
KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNodesDigest(false);
}
uint64_t KeeperStateMachine::getLastProcessedZxid() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getZXID();
}
uint64_t KeeperStateMachine::getNodesCount() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNodesCount();
}
uint64_t KeeperStateMachine::getTotalWatchesCount() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getTotalWatchesCount();
}
uint64_t KeeperStateMachine::getWatchedPathsCount() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getWatchedPathsCount();
}
uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getSessionsWithWatchesCount();
}
uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getTotalEphemeralNodesCount();
}
uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getSessionWithEphemeralNodesCount();
}
void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
storage->dumpWatches(buf);
}
void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
storage->dumpWatchesByPath(buf);
}
void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
storage->dumpSessionsAndEphemerals(buf);
}
uint64_t KeeperStateMachine::getApproximateDataSize() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getApproximateDataSize();
}
uint64_t KeeperStateMachine::getKeyArenaSize() const
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getArenaDataSize();
}
@ -905,7 +935,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
void KeeperStateMachine::recalculateStorageStats()
{
std::lock_guard lock(storage_and_responses_lock);
LockGuardWithStats lock(storage_and_responses_lock);
LOG_INFO(log, "Recalculating storage stats");
storage->recalculateStats();
LOG_INFO(log, "Done recalculating storage stats");

View File

@ -182,8 +182,7 @@ private:
KeeperSnapshotManagerS3 * snapshot_manager_s3;
KeeperStorage::ResponseForSession processReconfiguration(
const KeeperStorage::RequestForSession& request_for_session)
KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session)
TSA_REQUIRES(storage_and_responses_lock);
};
}

View File

@ -40,6 +40,8 @@ namespace ProfileEvents
extern const Event KeeperGetRequest;
extern const Event KeeperListRequest;
extern const Event KeeperExistsRequest;
extern const Event KeeperPreprocessElapsedMicroseconds;
extern const Event KeeperProcessElapsedMicroseconds;
}
namespace DB
@ -2309,6 +2311,20 @@ void KeeperStorage::preprocessRequest(
std::optional<Digest> digest,
int64_t log_idx)
{
Stopwatch watch;
SCOPE_EXIT({
auto elapsed = watch.elapsedMicroseconds();
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms)
{
LOG_INFO(
getLogger("KeeperStorage"),
"Preprocessing a request took too long ({}ms).\nRequest info: {}",
elapsed_ms,
zk_request->toString(/*short_format=*/true));
}
ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, elapsed);
});
if (!initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized");
@ -2409,6 +2425,20 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(
bool check_acl,
bool is_local)
{
Stopwatch watch;
SCOPE_EXIT({
auto elapsed = watch.elapsedMicroseconds();
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms)
{
LOG_INFO(
getLogger("KeeperStorage"),
"Processing a request took too long ({}ms).\nRequest info: {}",
elapsed_ms,
zk_request->toString(/*short_format=*/true));
}
ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, elapsed);
});
if (!initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized");

View File

@ -3,6 +3,7 @@
#include <Common/HashTable/HashMap.h>
#include <Common/ArenaUtils.h>
#include <list>
namespace DB
{

View File

@ -623,7 +623,7 @@ class IColumn;
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
@ -733,6 +733,7 @@ class IColumn;
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \
M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \
M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \
M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \
M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \

View File

@ -59,8 +59,10 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{
{"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
{"allow_materialized_view_with_bad_select", true, false, "Stricter validation in CREATE MATERIALIZED VIEW"},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
}},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},

View File

@ -32,7 +32,7 @@ namespace ErrorCodes
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
extern const int ILLEGAL_INDEX;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
}
@ -286,7 +286,7 @@ std::optional<size_t> DataTypeTuple::tryGetPositionByName(const String & name) c
String DataTypeTuple::getNameByPosition(size_t i) const
{
if (i == 0 || i > names.size())
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size());
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size());
return names[i - 1];
}

View File

@ -186,7 +186,7 @@ void IDisk::checkAccess()
DB::UUID server_uuid = DB::ServerUUID::get();
if (server_uuid == DB::UUIDHelpers::Nil)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized");
const String path = fmt::format("clickhouse_access_check_{}", DB::toString(server_uuid));
const String path = fmt::format("clickhouse_access_check_{}", toString(server_uuid));
checkAccessImpl(path);
}

View File

@ -427,7 +427,7 @@ public:
/// Device: 10301h/66305d Inode: 3109907 Links: 1
/// Why we have always zero by default? Because normal filesystem
/// manages hardlinks by itself. So you can always remove hardlink and all
/// other alive harlinks will not be removed.
/// other alive hardlinks will not be removed.
virtual UInt32 getRefCount(const String &) const { return 0; }
/// Revision is an incremental counter of disk operation.

View File

@ -14,6 +14,15 @@ namespace ProfileEvents
{
extern const Event RemoteWriteThrottlerBytes;
extern const Event RemoteWriteThrottlerSleepMicroseconds;
extern const Event AzureUpload;
extern const Event AzureStageBlock;
extern const Event AzureCommitBlockList;
extern const Event DiskAzureUpload;
extern const Event DiskAzureStageBlock;
extern const Event DiskAzureCommitBlockList;
}
namespace DB
@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize()
/// then we use single part upload instead of multi part upload
if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size)
{
ProfileEvents::increment(ProfileEvents::AzureUpload);
if (blob_container_client->GetClickhouseOptions().IsClientForDisk)
ProfileEvents::increment(ProfileEvents::DiskAzureUpload);
auto part_data = std::move(detached_part_data.front());
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(part_data.memory.data()), part_data.data_size);
@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl()
if (!block_ids.empty())
{
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
ProfileEvents::increment(ProfileEvents::AzureCommitBlockList);
if (blob_container_client->GetClickhouseOptions().IsClientForDisk)
ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList);
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries);
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
}
@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage:
auto & data_block_id = std::get<0>(*worker_data);
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
ProfileEvents::increment(ProfileEvents::AzureStageBlock);
if (blob_container_client->GetClickhouseOptions().IsClientForDisk)
ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(std::get<1>(*worker_data).memory.data()), data_size);
execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size);
};

View File

@ -60,7 +60,6 @@ public:
"ListObjectAzure")
, client(client_)
{
options.Prefix = path_prefix;
options.PageSizeHint = static_cast<int>(max_list_size);
}
@ -150,7 +149,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr
auto settings_ptr = settings.get();
auto client_ptr = client.get();
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys);
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size);
}
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const

View File

@ -75,6 +75,7 @@ struct RelativePathWithMetadata
virtual std::string getPath() const { return relative_path; }
virtual bool isArchive() const { return false; }
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
};
struct ObjectKeyWithMetadata

View File

@ -22,8 +22,7 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr<UnlinkMetadataFile
/// Also it has excessive API calls.
///
/// It is used to allow BACKUP/RESTORE to ObjectStorage (S3/...) with the same
/// structure as on disk MergeTree, and does not requires metadata from local
/// disk to restore.
/// structure as on disk MergeTree, and does not require metadata from a local disk to restore.
class MetadataStorageFromPlainObjectStorage : public IMetadataStorage
{
public:

View File

@ -1,10 +1,14 @@
#include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h>
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
#include <IO/ReadHelpers.h>
#include <IO/SharedThreadPools.h>
#include <IO/S3Common.h>
#include <Common/ErrorCodes.h>
#include <Common/logger_useful.h>
#include "CommonPathPrefixKeyGenerator.h"
namespace DB
{
@ -22,34 +26,78 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
{
MetadataStorageFromPlainObjectStorage::PathMap result;
RelativePathsWithMetadata files;
object_storage->listObjects(root, files, 0);
for (const auto & file : files)
ThreadPool & pool = getIOThreadPool().get();
ThreadPoolCallbackRunnerLocal<void> runner(pool, "PlainRWMetaLoad");
std::mutex mutex;
LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage");
ReadSettings settings;
settings.enable_filesystem_cache = false;
settings.remote_fs_method = RemoteFSReadMethod::read;
settings.remote_fs_buffer_size = 1024; /// These files are small.
LOG_DEBUG(log, "Loading metadata");
size_t num_files = 0;
for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next())
{
auto remote_path = std::filesystem::path(file->relative_path);
++num_files;
auto file = iterator->current();
String path = file->getPath();
auto remote_path = std::filesystem::path(path);
if (remote_path.filename() != PREFIX_PATH_FILE_NAME)
continue;
StoredObject object{file->relative_path};
runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings]
{
setThreadName("PlainRWMetaLoad");
auto read_buf = object_storage->readObject(object);
StoredObject object{path};
String local_path;
try
{
auto read_buf = object_storage->readObject(object, settings);
readStringUntilEOF(local_path, *read_buf);
}
#if USE_AWS_S3
catch (const S3Exception & e)
{
/// It is ok if a directory was removed just now.
/// We support attaching a filesystem that is concurrently modified by someone else.
if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY)
return;
throw;
}
#endif
catch (...)
{
throw;
}
chassert(remote_path.has_parent_path());
auto res = result.emplace(local_path, remote_path.parent_path());
std::pair<MetadataStorageFromPlainObjectStorage::PathMap::iterator, bool> res;
{
std::lock_guard lock(mutex);
res = result.emplace(local_path, remote_path.parent_path());
}
/// This can happen if table replication is enabled, then the same local path is written
/// in `prefix.path` of each replica.
/// TODO: should replicated tables (e.g., RMT) be explicitly disallowed?
if (!res.second)
LOG_WARNING(
getLogger("MetadataStorageFromPlainObjectStorage"),
log,
"The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'",
local_path,
res.first->second,
remote_path.parent_path().string());
});
}
runner.waitForAllToFinishAndRethrowFirstError();
LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result.size());
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, result.size());
return result;

View File

@ -4,6 +4,7 @@
#include <memory>
namespace DB
{

View File

@ -9,15 +9,34 @@ namespace DB
class IObjectStorageIterator
{
public:
/// Moves iterator to the next element. If the iterator not isValid, the behavior is undefined.
virtual void next() = 0;
virtual void nextBatch() = 0;
/// Check if the iterator is valid, which means the `current` method can be called.
virtual bool isValid() = 0;
/// Return the current element.
virtual RelativePathWithMetadataPtr current() = 0;
virtual RelativePathsWithMetadata currentBatch() = 0;
/// This will initiate prefetching the next batch in background, so it can be obtained faster when needed.
virtual std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() = 0;
/// Returns the number of elements in the batches that were fetched so far.
virtual size_t getAccumulatedSize() const = 0;
virtual ~IObjectStorageIterator() = default;
private:
/// Skips all the remaining elements in the current batch (if any),
/// and moves the iterator to the first element of the next batch,
/// or, if there is no more batches, the iterator becomes invalid.
/// If the iterator not isValid, the behavior is undefined.
virtual void nextBatch() = 0;
/// Return the current batch of elements.
/// It is unspecified how batches are formed.
/// But this method can be used for more efficient processing.
virtual RelativePathsWithMetadata currentBatch() = 0;
};
using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
@ -25,6 +44,7 @@ using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
class ObjectStorageIteratorFromList : public IObjectStorageIterator
{
public:
/// Everything is represented by just a single batch.
explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_)
: batch(std::move(batch_))
, batch_iterator(batch.begin()) {}

View File

@ -37,10 +37,11 @@ void IObjectStorageIteratorAsync::nextBatch()
{
std::lock_guard lock(mutex);
if (is_finished)
if (!has_next_batch)
{
current_batch.clear();
current_batch_iterator = current_batch.begin();
is_finished = true;
return;
}
@ -58,16 +59,23 @@ void IObjectStorageIteratorAsync::nextBatch()
current_batch = std::move(result.batch);
current_batch_iterator = current_batch.begin();
if (current_batch.empty())
{
is_finished = true;
has_next_batch = false;
}
else
{
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
if (result.has_next)
has_next_batch = result.has_next;
if (has_next_batch)
outcome_future = scheduleBatch();
else
is_finished = true;
}
}
catch (...)
{
is_finished = true;
has_next_batch = false;
throw;
}
}
@ -76,10 +84,12 @@ void IObjectStorageIteratorAsync::next()
{
std::lock_guard lock(mutex);
if (is_finished)
return;
++current_batch_iterator;
if (current_batch_iterator == current_batch.end())
nextBatch();
else
++current_batch_iterator;
}
std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch()
@ -99,7 +109,7 @@ bool IObjectStorageIteratorAsync::isValid()
if (!is_initialized)
nextBatch();
return current_batch_iterator != current_batch.end();
return !is_finished;
}
RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()

View File

@ -35,7 +35,7 @@ public:
void deactivate();
protected:
/// This method fetches the next batch, and returns true if there are more batches after it.
virtual bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) = 0;
struct BatchAndHasNext
@ -48,6 +48,7 @@ protected:
bool is_initialized{false};
bool is_finished{false};
bool has_next_batch{true};
bool deactivated{false};
mutable std::recursive_mutex mutex;

View File

@ -293,6 +293,8 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
{
auto settings_ptr = s3_settings.get();
if (!max_keys)
max_keys = settings_ptr->list_object_keys_size;
return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), max_keys);
}

View File

@ -67,7 +67,7 @@ private:
}
public:
template <class ...Args>
template <typename... Args>
explicit S3ObjectStorage(std::unique_ptr<S3::Client> && client_, Args && ...args)
: S3ObjectStorage("S3ObjectStorage", std::move(client_), std::forward<Args>(args)...)
{

View File

@ -202,7 +202,7 @@ public:
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_arguments);
validateFunctionArguments(*this, arguments, mandatory_arguments);
return std::make_shared<DataTypeString>();
}

View File

@ -16,6 +16,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int PARAMETER_OUT_OF_BOUND;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
}
@ -146,6 +147,9 @@ private:
const auto pos = pos_col_const->getUInt(0);
if (pos < 8 * sizeof(ValueType))
mask = mask | (ValueType(1) << pos);
else
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument {} is out of bounds for number", static_cast<UInt64>(pos));
}
else
{
@ -186,13 +190,20 @@ private:
for (const auto i : collections::range(0, mask.size()))
if (pos[i] < 8 * sizeof(ValueType))
mask[i] = mask[i] | (ValueType(1) << pos[i]);
else
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument {} is out of bounds for number", static_cast<UInt64>(pos[i]));
return true;
}
else if (const auto pos_col_const = checkAndGetColumnConst<ColumnVector<PosType>>(pos_col_untyped))
{
const auto & pos = pos_col_const->template getValue<PosType>();
const auto new_mask = pos < 8 * sizeof(ValueType) ? ValueType(1) << pos : 0;
if (pos >= 8 * sizeof(ValueType))
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument {} is out of bounds for number", static_cast<UInt64>(pos));
const auto new_mask = ValueType(1) << pos;
for (const auto i : collections::range(0, mask.size()))
mask[i] = mask[i] | new_mask;

View File

@ -95,22 +95,21 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName
return res;
}
void validateArgumentType(const IFunction & func, const DataTypes & arguments,
size_t argument_index, bool (* validator_func)(const IDataType &),
const char * expected_type_description)
{
if (arguments.size() <= argument_index)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}",
func.getName());
const auto & argument = arguments[argument_index];
if (!validator_func(*argument))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}",
argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description);
}
namespace
{
String withOrdinalEnding(size_t i)
{
switch (i)
{
case 0: return "1st";
case 1: return "2nd";
case 2: return "3rd";
default: return std::to_string(i) + "th";
}
}
void validateArgumentsImpl(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
size_t argument_offset,
@ -120,20 +119,18 @@ void validateArgumentsImpl(const IFunction & func,
{
const auto argument_index = i + argument_offset;
if (argument_index >= arguments.size())
{
break;
}
const auto & arg = arguments[i + argument_offset];
const auto & descriptor = descriptors[i];
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
throw Exception(error_code,
"Illegal type of argument #{}{} of function {}{}{}",
argument_offset + i + 1, // +1 is for human-friendly 1-based indexing
(descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}),
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
withOrdinalEnding(argument_offset + i),
descriptor.name,
func.getName(),
(descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}),
(arg.type ? ", got " + arg.type->getName() : String{}));
descriptor.type_name,
arg.type ? arg.type->getName() : "<?>");
}
}
@ -141,52 +138,42 @@ void validateArgumentsImpl(const IFunction & func,
int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const
{
if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type)))
if (name.empty() || type_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set");
if (type_validator && (data_type == nullptr || !type_validator(*data_type)))
return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT;
if (column_validator_func && (column == nullptr || !column_validator_func(*column)))
if (column_validator && (column == nullptr || !column_validator(*column)))
return ErrorCodes::ILLEGAL_COLUMN;
return 0;
}
void validateFunctionArgumentTypes(const IFunction & func,
void validateFunctionArguments(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
{
if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size())
{
auto join_argument_types = [](const auto & args, const String sep = ", ")
{
String result;
for (const auto & a : args)
{
using A = std::decay_t<decltype(a)>;
if constexpr (std::is_same_v<A, FunctionArgumentDescriptor>)
{
if (a.argument_name)
result += "'" + std::string(a.argument_name) + "' : ";
if (a.expected_type_description)
result += a.expected_type_description;
}
else if constexpr (std::is_same_v<A, ColumnWithTypeAndName>)
result += a.type->getName();
auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
result += sep;
}
if (!args.empty())
result.erase(result.end() - sep.length(), result.end());
return result;
};
String expected_args_string;
if (!mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args));
else if (!mandatory_args.empty() && optional_args.empty())
expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
else if (mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args));
else
expected_args_string = "0 arguments";
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})",
func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}),
mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""),
join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : ""));
"An incorrect number of arguments was specified for function '{}'. Expected {}, got {}",
func.getName(),
expected_args_string,
fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments)));
}
validateArgumentsImpl(func, arguments, 0, mandatory_args);

View File

@ -115,67 +115,49 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col);
/// column if it is nullable.
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns);
/// Checks argument type at specified index with predicate.
/// throws if there is no argument at specified index or if predicate returns false.
void validateArgumentType(const IFunction & func, const DataTypes & arguments,
size_t argument_index, bool (* validator_func)(const IDataType &),
const char * expected_type_description);
/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected
*
* Also it is used to generate function description when arguments do not match expected ones.
* Any field can be null:
* `argument_name` - if not null, reported via type check errors.
* `expected_type_description` - if not null, reported via type check errors.
* `type_validator_func` - if not null, used to validate data type of function argument.
* `column_validator_func` - if not null, used to validate column of function argument.
*/
/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided
/// arguments match the expected arguments.
struct FunctionArgumentDescriptor
{
const char * argument_name;
/// The argument name, e.g. "longitude".
/// Should not be empty.
std::string_view name;
/// A function which validates the argument data type.
/// May be nullptr.
using TypeValidator = bool (*)(const IDataType &);
TypeValidator type_validator_func;
TypeValidator type_validator;
/// A function which validates the argument column.
/// May be nullptr.
using ColumnValidator = bool (*)(const IColumn &);
ColumnValidator column_validator_func;
ColumnValidator column_validator;
const char * expected_type_description;
/// The expected argument type, e.g. "const String" or "UInt64".
/// Should not be empty.
std::string_view type_name;
/** Validate argument type and column.
*
* Returns non-zero error code if:
* Validator != nullptr && (Value == nullptr || Validator(*Value) == false)
* For:
* Validator is either `type_validator_func` or `column_validator_func`
* Value is either `data_type` or `column` respectively.
* ILLEGAL_TYPE_OF_ARGUMENT if type validation fails
*
*/
/// Validate argument type and column.
int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const;
};
using FunctionArgumentDescriptors = std::vector<FunctionArgumentDescriptor>;
/** Validate that function arguments match specification.
*
* Designed to simplify argument validation for functions with variable arguments
* (e.g. depending on result type or other trait).
* First, checks that number of arguments is as expected (including optional arguments).
* Second, checks that mandatory args present and have valid type.
* Third, checks optional arguments types, skipping ones that are missing.
*
* Please note that if you have several optional arguments, like f([a, b, c]),
* only these calls are considered valid:
* f(a)
* f(a, b)
* f(a, b, c)
*
* But NOT these: f(a, c), f(b, c)
* In other words you can't omit middle optional arguments (just like in regular C++).
*
* If any mandatory arg is missing, throw an exception, with explicit description of expected arguments.
*/
void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments,
/// Validates that the user-provided arguments match the expected arguments.
///
/// Checks that
/// - the number of provided arguments matches the number of mandatory/optional arguments,
/// - all mandatory arguments are present and have the right type,
/// - optional arguments - if present - have the right type.
///
/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid:
/// f(a)
/// f(a, b)
/// f(a, b, c)
/// but these are NOT:
/// f(a, c)
/// f(b, c)
void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args = {});
@ -183,9 +165,8 @@ void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithType
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns.
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
*/
/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns.
/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count);
struct NullPresence

View File

@ -5,7 +5,7 @@
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/HashJoin.h>
#include <Interpreters/HashJoin/HashJoin.h>
#include <Storages/StorageJoin.h>
#include <Storages/TableLockHolder.h>

View File

@ -40,7 +40,7 @@ public:
{"replacement", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeString>();
}

View File

@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
validateFunctionArguments(func, arguments, mandatory_args, optional_args);
}
static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
validateFunctionArguments(func, arguments, mandatory_args, optional_args);
}
}

View File

@ -47,7 +47,7 @@ public:
FunctionArgumentDescriptors args{
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime64), nullptr, "DateTime64"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeInt64>();
}

View File

@ -165,7 +165,7 @@ private:
});
}
validateFunctionArgumentTypes(*this, arguments,
validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "encryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "plaintext"},
@ -438,7 +438,7 @@ private:
});
}
validateFunctionArgumentTypes(*this, arguments,
validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "decryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "ciphertext"},

View File

@ -2020,7 +2020,7 @@ public:
DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const
{
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}};
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}};
FunctionArgumentDescriptors optional_args;
if constexpr (to_decimal)
@ -2049,7 +2049,7 @@ public:
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"});
}
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
if constexpr (std::is_same_v<ToDataType, DataTypeInterval>)
{
@ -2390,7 +2390,7 @@ public:
if (isDateTime64<Name, ToDataType>(arguments))
{
validateFunctionArgumentTypes(*this, arguments,
validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{{"string", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}},
// optional
FunctionArgumentDescriptors{

View File

@ -518,37 +518,41 @@ struct Dispatcher
template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{
const auto & value_col_typed = checkAndGetColumn<ColumnVector<T>>(*value_col);
// Non-const value argument:
const auto * value_col_typed = checkAndGetColumn<ColumnVector<T>>(value_col);
if (value_col_typed)
{
auto col_res = ColumnVector<T>::create();
typename ColumnVector<T>::Container & vec_res = col_res->getData();
vec_res.resize(value_col_typed.getData().size());
vec_res.resize(value_col_typed->getData().size());
if (!vec_res.empty())
{
// Const scale argument:
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
if (scale_arg == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed.getData(), scale, vec_res);
FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed->getData(), scale, vec_res);
}
else if (scale_arg > 0)
{
size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed.getData(), scale, vec_res);
FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed->getData(), scale, vec_res);
}
else
{
size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed.getData(), scale, vec_res);
FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed->getData(), scale, vec_res);
}
}
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & value_data = value_col_typed.getData();
const auto & value_data = value_col_typed->getData();
const auto & scale_data = scale_col_typed->getData();
const size_t rows = value_data.size();
@ -576,9 +580,17 @@ struct Dispatcher
}
}
}
return col_res;
}
// Const value argument:
const auto * value_col_typed_const = checkAndGetColumnConst<ColumnVector<T>>(value_col);
if (value_col_typed_const)
{
auto value_col_full = value_col_typed_const->convertToFullColumn();
return apply<ScaleType>(value_col_full.get(), scale_col);
}
return nullptr;
}
};
/// Dispatcher for Decimal inputs
@ -589,20 +601,25 @@ public:
template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{
const auto & value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(*value_col);
const typename ColumnDecimal<T>::Container & vec_src = value_col_typed.getData();
// Non-const value argument:
const auto * value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(value_col);
if (value_col_typed)
{
const typename ColumnDecimal<T>::Container & vec_src = value_col_typed->getData();
auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed.getScale());
auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed->getScale());
auto & vec_res = col_res->getData();
vec_res.resize(vec_src.size());
if (!vec_res.empty())
{
/// Const scale argument:
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg);
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg);
}
/// Non-const scale argument
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & scale = scale_col_typed->getData();
@ -614,7 +631,7 @@ public:
validateScale(scale64);
Scale raw_scale = scale64;
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(),
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(),
reinterpret_cast<ColumnDecimal<T>::NativeT&>(col_res->getElement(i)), raw_scale);
}
}
@ -622,6 +639,15 @@ public:
return col_res;
}
// Const value argument:
const auto * value_col_typed_const = checkAndGetColumnConst<ColumnDecimal<T>>(value_col);
if (value_col_typed_const)
{
auto value_col_full = value_col_typed_const->convertToFullColumn();
return apply<ScaleType>(value_col_full.get(), scale_col);
}
return nullptr;
}
};
/// Functions that round the value of an input parameter of type (U)Int8/16/32/64, Float32/64 or Decimal32/64/128.
@ -647,7 +673,7 @@ public:
FunctionArgumentDescriptors optional_args{
{"N", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "The number of decimal places to round to"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return arguments[0].type;
}
@ -671,9 +697,6 @@ public:
using ScaleTypes = std::decay_t<decltype(scaleTypes)>;
using ScaleType = typename ScaleTypes::RightType;
if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column");
res = Dispatcher<DataType, rounding_mode, tie_breaking_mode>::template apply<ScaleType>(value_arg.column.get(), scale_column.column.get());
return true;
};

View File

@ -48,7 +48,7 @@ namespace
{"json", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>());
}

View File

@ -32,7 +32,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -30,7 +30,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -30,7 +30,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;

View File

@ -31,7 +31,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
validateFunctionArguments(func, arguments, mandatory_args);
}
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}

View File

@ -51,6 +51,8 @@ public:
bool isVariadic() const override { return impl.isVariadic(); }
size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); }
bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); }
bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); }
bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; }
@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase<MapToNestedAdapter<Name, retur
template <typename Name, size_t position>
struct MapToSubcolumnAdapter
{
static_assert(position <= 1);
static_assert(position <= 1, "position of Map subcolumn must be 0 or 1");
static void extractNestedTypes(DataTypes & types)
{
@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; };
using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>;
struct NameMapContains { static constexpr auto name = "mapContains"; };
using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapContains>;
using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapContains, 0>, NameMapContains>;
struct NameMapFilter { static constexpr auto name = "mapFilter"; };
using FunctionMapFilter = FunctionMapToArrayAdapter<FunctionArrayFilter, MapToNestedAdapter<NameMapFilter>, NameMapFilter>;

View File

@ -87,7 +87,7 @@ public:
{"array_1", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"array_2", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeNumber<ResultType>>();
}

View File

@ -39,7 +39,7 @@ public:
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"samples", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt), isColumnConst, "const UInt*"},
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
// Return an array with the same nested type as the input array
const DataTypePtr & array_type = arguments[0].type;

View File

@ -31,7 +31,7 @@ public:
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"}
};
validateFunctionArgumentTypes(*this, arguments, args);
validateFunctionArguments(*this, arguments, args);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(array_type->getNestedType()));

View File

@ -159,7 +159,7 @@ public:
{"separator", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeString>();
}

View File

@ -8,6 +8,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int PARAMETER_OUT_OF_BOUND;
}
namespace
@ -21,12 +22,21 @@ struct BitTestImpl
static const constexpr bool allow_string_integer = false;
template <typename Result = ResultType>
NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
static Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument");
else
return (typename NumberTraits::ToInteger<A>::Type(a) >> typename NumberTraits::ToInteger<B>::Type(b)) & 1;
{
typename NumberTraits::ToInteger<A>::Type a_int = a;
typename NumberTraits::ToInteger<B>::Type b_int = b;
const auto max_position = static_cast<decltype(b)>((8 * sizeof(a)) - 1);
if (b_int > max_position || b_int < 0)
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument needs to a positive value and less or equal to {} for integer {}",
std::to_string(max_position), std::to_string(a_int));
return (a_int >> b_int) & 1;
}
}
#if USE_EMBEDDED_COMPILER

View File

@ -203,7 +203,7 @@ private:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}};
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}};
FunctionArgumentDescriptors optional_args;
if (isDecimal(type) || isDateTime64(type))
@ -212,9 +212,9 @@ private:
if (isDateTimeOrDateTime64(type))
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"});
optional_args.push_back({"default_value", nullptr, nullptr, nullptr});
optional_args.push_back({"default_value", nullptr, nullptr, "any type"});
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
size_t additional_argument_index = 1;

View File

@ -46,24 +46,29 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() < 2)
throw Exception(
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Number of arguments for function {} doesn't match: passed {}, should be at least 2",
getName(),
arguments.size());
if (arguments.size() == 1)
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} should not be 1", getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (arguments.empty())
{
auto res_data = ColumnString::create();
res_data->insertDefault();
return ColumnConst::create(std::move(res_data), input_rows_count);
}
else if (arguments.size() == 1)
return arguments[0].column;
/// Format function is not proven to be faster for two arguments.
/// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl.
/// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%).
/// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%).
if (arguments.size() == 2)
else if (arguments.size() == 2)
return executeBinary(arguments, input_rows_count);
else
return executeFormatImpl(arguments, input_rows_count);
}
@ -209,11 +214,11 @@ public:
{
if (arguments.size() == 1)
return FunctionFactory::instance().getImpl("toString", context)->build(arguments);
if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
if (std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); }))
if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); }))
return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments);
return std::make_unique<FunctionToFunctionBaseAdaptor>(
FunctionConcat::create(context),
@ -221,15 +226,8 @@ public:
return_type);
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
{
if (arguments.empty())
throw Exception(
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Number of arguments for function {} doesn't match: passed {}, should be at least 1.",
getName(),
arguments.size());
/// We always return Strings from concat, even if arguments were fixed strings.
return std::make_shared<DataTypeString>();
}

Some files were not shown because too many files have changed in this diff Show More