Merge remote-tracking branch 'origin/master' into mv4

This commit is contained in:
Michael Kolupaev 2024-07-04 08:28:59 +00:00
commit ba5e50cedc
292 changed files with 7460 additions and 5670 deletions

1
.gitattributes vendored
View File

@ -2,3 +2,4 @@ contrib/* linguist-vendored
*.h linguist-language=C++ *.h linguist-language=C++
tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/data_json/* binary
tests/queries/0_stateless/*.reference -crlf tests/queries/0_stateless/*.reference -crlf
src/Core/SettingsChangesHistory.cpp merge=union

View File

@ -13,5 +13,4 @@ rules:
level: warning level: warning
comments: comments:
min-spaces-from-content: 1 min-spaces-from-content: 1
document-start: document-start: disable
present: false

View File

@ -34,7 +34,7 @@
* Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). * Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). * Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). * Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)).
* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). * Add `http_response_headers` configuration to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing. * Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing.
* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). * Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)).
* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). * Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)).

View File

@ -1,32 +1,3 @@
// Based on https://github.com/amdn/itoa and combined with our optimizations
//
//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-//
//
// The MIT License (MIT)
// Copyright (c) 2016 Arturo Martin-de-Nicolas
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//===----------------------------------------------------------------------===//
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits> #include <type_traits>
#include <base/defines.h> #include <base/defines.h>
#include <base/extended_types.h> #include <base/extended_types.h>
@ -34,99 +5,15 @@
namespace namespace
{ {
template <typename T> ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value)
ALWAYS_INLINE inline constexpr T pow10(size_t x)
{
return x ? 10 * pow10<T>(x - 1) : 1;
}
// Division by a power of 10 is implemented using a multiplicative inverse.
// This strength reduction is also done by optimizing compilers, but
// presently the fastest results are produced by using the values
// for the multiplication and the shift as given by the algorithm
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
//
// http://www.agner.org/optimize/optimizing_assembly.pdf
//
// "Integer division by a constant (all processors)
// A floating point number can be divided by a constant by multiplying
// with the reciprocal. If we want to do the same with integers, we have
// to scale the reciprocal by 2n and then shift the product to the right
// by n. There are various algorithms for finding a suitable value of n
// and compensating for rounding errors. The algorithm described below
// was invented by Terje Mathisen, Norway, and not published elsewhere."
/// Division by constant is performed by:
/// 1. Adding 1 if needed;
/// 2. Multiplying by another constant;
/// 3. Shifting right by another constant.
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
struct Division
{
static constexpr bool add{add_};
static constexpr UInt multiplier{multiplier_};
static constexpr unsigned shift{shift_};
};
/// Select a type with appropriate number of bytes from the list of types.
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
template <size_t N, typename T, typename... Ts>
struct SelectType
{
using Result = typename SelectType<N / 2, Ts...>::Result;
};
template <typename T, typename... Ts>
struct SelectType<1, T, Ts...>
{
using Result = T;
};
/// Division by 10^N where N is the size of the type.
template <size_t N>
using DivisionBy10PowN = typename SelectType<
N,
Division<uint8_t, false, 205U, 11>, /// divide by 10
Division<uint16_t, true, 41943U, 22>, /// divide by 100
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
>::Result;
template <size_t N>
using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
template <size_t N>
struct QuotientAndRemainder
{
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
};
template <size_t N>
QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
{
constexpr DivisionBy10PowN<N> division;
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
return {quotient, remainder};
}
ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
{ {
*p = '0' + value; *p = '0' + value;
++p; return p + 1;
return p;
} }
// Using a lookup table to convert binary numbers from 0 to 99 // Using a lookup table to convert binary numbers from 0 to 99
// into ascii characters as described by Andrei Alexandrescu in // into ascii characters as described by Andrei Alexandrescu in
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
const char digits[201] = "00010203040506070809" const char digits[201] = "00010203040506070809"
"10111213141516171819" "10111213141516171819"
"20212223242526272829" "20212223242526272829"
@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809"
"70717273747576777879" "70717273747576777879"
"80818283848586878889" "80818283848586878889"
"90919293949596979899"; "90919293949596979899";
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
{ {
memcpy(p, &digits[value * 2], 2); memcpy(p, &digits[value * 2], 2);
@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
return p; return p;
} }
namespace convert namespace jeaiii
{ {
template <typename UInt, size_t N = sizeof(UInt)> /*
char * head(char * p, UInt u); MIT License
template <typename UInt, size_t N = sizeof(UInt)>
char * tail(char * p, UInt u);
//===----------------------------------------------------------===// Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
// head: find most significant digit, skip leading zeros
//===----------------------------------------------------------===//
// "x" contains quotient and remainder after division by 10^N Permission is hereby granted, free of charge, to any person obtaining a copy
// quotient is less than 10^N of this software and associated documentation files (the "Software"), to deal
template <size_t N> in the Software without restriction, including without limitation the rights
ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x) to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
struct pair
{ {
p = head(p, UnsignedOfSize<N / 2>(x.quotient)); char dd[2];
p = tail(p, x.remainder); constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor)
return p; constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor)
} };
// "u" is less than 10^2*N constexpr struct
template <typename UInt, size_t N>
ALWAYS_INLINE inline char * head(char * p, UInt u)
{ {
return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u)); pair dd[100]{
} 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, //
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
};
pair fd[100]{
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
};
} digits;
// recursion base case, selected when "u" is one byte constexpr UInt64 mask24 = (UInt64(1) << 24) - 1;
template <> constexpr UInt64 mask32 = (UInt64(1) << 32) - 1;
ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) constexpr UInt64 mask57 = (UInt64(1) << 57) - 1;
template <bool, class, class F>
struct _cond
{ {
return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); using type = F;
} };
template <class T, class F>
//===----------------------------------------------------------===// struct _cond<true, T, F>
// tail: produce all digits including leading zeros
//===----------------------------------------------------------===//
// recursive step, "u" is less than 10^2*N
template <typename UInt, size_t N>
ALWAYS_INLINE inline char * tail(char * p, UInt u)
{ {
QuotientAndRemainder<N> x = split<N>(u); using type = T;
p = tail(p, UnsignedOfSize<N / 2>(x.quotient)); };
p = tail(p, x.remainder); template <bool B, class T, class F>
return p; using cond = typename _cond<B, T, F>::type;
}
// recursion base case, selected when "u" is one byte template <class T>
template <> inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i)
ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{ {
return outTwoDigits(p, u); constexpr auto q = sizeof(T);
} using U = cond<q == 1, char8_t, cond<q <= sizeof(UInt16), UInt16, cond<q <= sizeof(UInt32), UInt32, UInt64>>>;
//===----------------------------------------------------------===// // convert bool to int before test with unary + to silence warning if T happens to be bool
// large values are >= 10^2*N U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i);
// where x contains quotient and remainder after division by 10^N
//===----------------------------------------------------------===//
template <size_t N>
ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
{
QuotientAndRemainder<N> y = split<N>(x.quotient);
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
p = tail(p, y.remainder);
p = tail(p, x.remainder);
return p;
}
//===----------------------------------------------------------===// if (n < U(1e2))
// handle values of "u" that might be >= 10^2*N {
// where N is the size of "u" in bytes /// This is changed from the original jeaiii implementation
//===----------------------------------------------------------===// /// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions
template <typename UInt, size_t N = sizeof(UInt)> /// and a memory access (no need to read digits.fd[n])
ALWAYS_INLINE inline char * uitoa(char * p, UInt u) /// This is not true for pure random numbers, but that's not the common use case of a database
{ /// Original jeaii code
if (u < pow10<UnsignedOfSize<N>>(N)) // *reinterpret_cast<pair *>(b) = digits.fd[n];
return head(p, UnsignedOfSize<N / 2>(u)); // return n < 10 ? b + 1 : b + 2;
QuotientAndRemainder<N> x = split<N>(u); return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n);
}
if (n < UInt32(1e6))
{
if (sizeof(U) == 1 || n < U(1e4))
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
if constexpr (sizeof(U) == 1)
b -= 1;
else
b -= n < U(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
return b + 4;
}
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
if constexpr (sizeof(U) == 2)
b -= 1;
else
b -= n < U(1e5);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
return b + 6;
}
if (sizeof(U) == 4 || n < UInt64(1ull << 32ull))
{
if (n < U(1e8))
{
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= n < U(1e7);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
return b + 8;
}
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
b -= n < UInt32(1e9);
auto f2 = (f0 & mask57) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
auto f4 = (f2 & mask57) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
auto f6 = (f4 & mask57) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
auto f8 = (f6 & mask57) * 100;
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
return b + 10;
}
return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x); // if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings
} UInt32 z = n % UInt32(1e8);
UInt64 u = n / UInt32(1e8);
// selected when "u" is one byte if (u < UInt32(1e2))
template <> {
ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
{ *reinterpret_cast<pair *>(b) = digits.dd[u];
if (u < 10) b += 2;
return outDigit(p, u); }
else if (u < 100) else if (u < UInt32(1e6))
return outTwoDigits(p, u); {
if (u < UInt32(1e4))
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
b -= u < UInt32(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
b += 4;
}
else
{
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= u < UInt32(1e5);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
b += 6;
}
}
else if (u < UInt32(1e8))
{
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
b -= u < UInt32(1e7);
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
b += 8;
}
else if (u < UInt64(1ull << 32ull))
{
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
b -= u < UInt32(1e9);
auto f2 = (f0 & mask57) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
auto f4 = (f2 & mask57) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
auto f6 = (f4 & mask57) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
auto f8 = (f6 & mask57) * 100;
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
b += 10;
}
else else
{ {
p = outDigit(p, u / 100); UInt32 y = u % UInt32(1e8);
p = outTwoDigits(p, u % 100); u /= UInt32(1e8);
return p;
// u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
if (u < UInt32(1e2))
{
*reinterpret_cast<pair *>(b) = digits.dd[u];
b += 2;
}
else
{
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
b -= u < UInt32(1e3);
auto f2 = (f0 & mask24) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
b += 4;
}
// do 8 digits
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
auto f2 = (f0 & mask32) * 100;
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
auto f6 = (f4 & mask32) * 100;
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
b += 8;
} }
} // do 8 digits
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
//===----------------------------------------------------------===// *reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
// handle unsigned and signed integral operands auto f2 = (f0 & mask32) * 100;
//===----------------------------------------------------------===// *reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
auto f4 = (f2 & mask32) * 100;
// itoa: handle unsigned integral operands (selected by SFINAE) *reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
template <typename U> auto f6 = (f4 & mask32) * 100;
requires(!std::is_signed_v<U> && std::is_integral_v<U>) *reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
ALWAYS_INLINE inline char * itoa(U u, char * p) return b + 8;
{
return convert::uitoa(p, u);
}
// itoa: handle signed integral operands (selected by SFINAE)
template <typename I, size_t N = sizeof(I)>
requires(std::is_signed_v<I> && std::is_integral_v<I>)
ALWAYS_INLINE inline char * itoa(I i, char * p)
{
// Need "mask" to be filled with a copy of the sign bit.
// If "i" is a negative value, then the result of "operator >>"
// is implementation-defined, though usually it is an arithmetic
// right shift that replicates the sign bit.
// Use a conditional expression to be portable,
// a good optimizing compiler generates an arithmetic right shift
// and avoids the conditional branch.
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
// Cannot use std::abs() because the result is undefined
// in 2's complement systems for the most-negative value.
// Want to avoid conditional branch for performance reasons since
// CPU branch prediction will be ineffective when negative values
// occur randomly.
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
// This yields the absolute value with the desired type without
// using a conditional branch and without invoking undefined or
// implementation defined behavior:
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
// Unconditionally store a minus sign when producing digits
// in a forward direction and increment the pointer only if
// the value is in fact negative.
// This avoids a conditional branch and is safe because we will
// always produce at least one digit and it will overwrite the
// minus sign when the value is not negative.
*p = '-';
p += (mask & 1);
p = convert::uitoa(p, u);
return p;
} }
} }
@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
{ {
/// If we the highest 64bit item is empty, we can print just the lowest item as u64 /// If we the highest 64bit item is empty, we can print just the lowest item as u64
if (_x.items[UInt128::_impl::little(1)] == 0) if (_x.items[UInt128::_impl::little(1)] == 0)
return convert::itoa(_x.items[UInt128::_impl::little(0)], p); return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]);
/// Doing operations using __int128 is faster and we already rely on this feature /// Doing operations using __int128 is faster and we already rely on this feature
using T = unsigned __int128; using T = unsigned __int128;
@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
current_block += max_multiple_of_hundred_blocks; current_block += max_multiple_of_hundred_blocks;
} }
char * highest_part_print = convert::itoa(uint64_t(x), p); char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x));
for (int i = 0; i < current_block; i++) for (int i = 0; i < current_block; i++)
{ {
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
char * itoa(UInt8 i, char * p) char * itoa(UInt8 i, char * p)
{ {
return convert::itoa(uint8_t(i), p); return jeaiii::to_text_from_integer(p, uint8_t(i));
} }
char * itoa(Int8 i, char * p) char * itoa(Int8 i, char * p)
{ {
return convert::itoa(int8_t(i), p); return jeaiii::to_text_from_integer(p, int8_t(i));
} }
char * itoa(UInt128 i, char * p) char * itoa(UInt128 i, char * p)
@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p)
#define DEFAULT_ITOA(T) \ #define DEFAULT_ITOA(T) \
char * itoa(T i, char * p) \ char * itoa(T i, char * p) \
{ \ { \
return convert::itoa(i, p); \ return jeaiii::to_text_from_integer(p, i); \
} }
#define FOR_MISSING_INTEGER_TYPES(M) \ #define FOR_MISSING_INTEGER_TYPES(M) \

View File

@ -235,8 +235,6 @@ namespace Net
/// Note that simply closing a socket is not sufficient /// Note that simply closing a socket is not sufficient
/// to be able to re-use it again. /// to be able to re-use it again.
Poco::Timespan getMaxTimeout();
private: private:
SecureSocketImpl(const SecureSocketImpl &); SecureSocketImpl(const SecureSocketImpl &);
SecureSocketImpl & operator=(const SecureSocketImpl &); SecureSocketImpl & operator=(const SecureSocketImpl &);
@ -250,6 +248,9 @@ namespace Net
Session::Ptr _pSession; Session::Ptr _pSession;
friend class SecureStreamSocketImpl; friend class SecureStreamSocketImpl;
Poco::Timespan getMaxTimeoutOrLimit();
//// Return max(send, receive) if non zero, otherwise maximum timeout
}; };

View File

@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake)
if (performHandshake && _pSocket->getBlocking()) if (performHandshake && _pSocket->getBlocking())
{ {
int ret; int ret;
Poco::Timespan remaining_time = getMaxTimeout(); Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do do
{ {
RemainingTimeCounter counter(remaining_time); RemainingTimeCounter counter(remaining_time);
@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
return rc; return rc;
} }
Poco::Timespan remaining_time = getMaxTimeout(); Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do do
{ {
RemainingTimeCounter counter(remaining_time); RemainingTimeCounter counter(remaining_time);
@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags)
return rc; return rc;
} }
Poco::Timespan remaining_time = getMaxTimeout(); Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do do
{ {
/// SSL record may consist of several TCP packets, /// SSL record may consist of several TCP packets,
@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake()
poco_check_ptr (_pSSL); poco_check_ptr (_pSSL);
int rc; int rc;
Poco::Timespan remaining_time = getMaxTimeout(); Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
do do
{ {
RemainingTimeCounter counter(remaining_time); RemainingTimeCounter counter(remaining_time);
@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const
return 0; return 0;
} }
Poco::Timespan SecureSocketImpl::getMaxTimeout() Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit()
{ {
std::lock_guard<std::recursive_mutex> lock(_mutex); std::lock_guard<std::recursive_mutex> lock(_mutex);
Poco::Timespan remaining_time = _pSocket->getReceiveTimeout(); Poco::Timespan remaining_time = _pSocket->getReceiveTimeout();
Poco::Timespan send_timeout = _pSocket->getSendTimeout(); Poco::Timespan send_timeout = _pSocket->getSendTimeout();
if (remaining_time < send_timeout) if (remaining_time < send_timeout)
remaining_time = send_timeout; remaining_time = send_timeout;
/// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this
///
/// NOTE: we cannot use INT64_MAX (std::numeric_limits<Poco::Timespan::TimeDiff>::max()),
/// since it will be later passed to poll() which accept int timeout, and
/// even though poll() accepts milliseconds and Timespan() accepts
/// microseconds, let's use smaller maximum value just to avoid some possible
/// issues, this should be enough anyway (it is ~24 days).
if (remaining_time == 0)
remaining_time = Poco::Timespan(std::numeric_limits<int>::max());
return remaining_time; return remaining_time;
} }
bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
{ {
if (remaining_time == 0)
return false;
std::lock_guard<std::recursive_mutex> lock(_mutex); std::lock_guard<std::recursive_mutex> lock(_mutex);
if (rc <= 0) if (rc <= 0)
{ {
@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
case SSL_ERROR_WANT_READ: case SSL_ERROR_WANT_READ:
if (_pSocket->getBlocking()) if (_pSocket->getBlocking())
{ {
/// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket, if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ))
/// epoll_wait returns true without waiting for new data even if remaining_time == 0
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0)
return true; return true;
else else
throw Poco::TimeoutException(); throw Poco::TimeoutException();
@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
case SSL_ERROR_WANT_WRITE: case SSL_ERROR_WANT_WRITE:
if (_pSocket->getBlocking()) if (_pSocket->getBlocking())
{ {
/// The same as for SSL_ERROR_WANT_READ if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE))
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0)
return true; return true;
else else
throw Poco::TimeoutException(); throw Poco::TimeoutException();
} }
break; break;
/// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in
/// OpenSSL), so this likely dead code, but let's leave it for
/// compatibility with other implementations
case SSL_ERROR_SYSCALL: case SSL_ERROR_SYSCALL:
return socketError == POCO_EAGAIN || socketError == POCO_EINTR; return socketError == POCO_EAGAIN || socketError == POCO_EINTR;
default: default:

View File

@ -253,7 +253,7 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
set +e set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \ | ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt | tee -a test_output/test_result.txt

View File

@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
- `TDigest` - `TDigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns.
- `Uniq` - `Uniq`
Estimate the number of distinct values of a column by HyperLogLog. [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
## Column-level Settings {#column-level-settings} ## Column-level Settings {#column-level-settings}

View File

@ -6,23 +6,30 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m
<SelfManaged /> <SelfManaged />
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. [SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
**Example** **Example**
```xml ```xml
<clickhouse> <clickhouse>
<!- ... --> <!- ... -->
<users> <users>
<user_name> <user_name_1>
<ssl_certificates> <ssl_certificates>
<common_name>host.domain.com:example_user</common_name> <common_name>host.domain.com:example_user</common_name>
<common_name>host.domain.com:example_user_dev</common_name> <common_name>host.domain.com:example_user_dev</common_name>
<!-- More names --> <!-- More names -->
</ssl_certificates> </ssl_certificates>
<!-- Other settings --> <!-- Other settings -->
</user_name> </user_name_1>
<user_name_2>
<ssl_certificates>
<subject_alt_name>DNS:host.domain.com</subject_alt_name>
<!-- More names -->
</ssl_certificates>
<!-- Other settings -->
</user_name_2>
</users> </users>
</clickhouse> </clickhouse>
``` ```

View File

@ -2536,7 +2536,7 @@ Possible values:
- 0 — Optimization disabled. - 0 — Optimization disabled.
- 1 — Optimization enabled. - 1 — Optimization enabled.
Default value: `0`. Default value: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query} ## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -1,24 +1,20 @@
--- ---
slug: /en/sql-reference/data-types/json slug: /en/sql-reference/data-types/object-data-type
sidebar_position: 26 sidebar_position: 26
sidebar_label: JSON sidebar_label: Object Data Type
keywords: [object, data type]
--- ---
# JSON # Object Data Type
:::note :::note
This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864)
::: :::
Stores JavaScript Object Notation (JSON) documents in a single column. Stores JavaScript Object Notation (JSON) documents in a single column.
`JSON` is an alias for `Object('json')`. `JSON` is an alias for `Object('json')`.
:::note
The JSON data type is an obsolete feature. Do not use it.
If you want to use it, set `allow_experimental_object_type = 1`.
:::
## Example ## Example
**Example 1** **Example 1**
@ -49,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json
**Example 2** **Example 2**
To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format:
```sql ```sql
CREATE TABLE logs CREATE TABLE logs
@ -69,7 +65,7 @@ FROM file('access.json.gz', JSONAsString)
## Displaying JSON columns ## Displaying JSON columns
When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`: When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`:
```sql ```sql
SET output_format_json_named_tuples_as_objects = 1 SET output_format_json_named_tuples_as_objects = 1
@ -83,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow
## Related Content ## Related Content
- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json)
- [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) - [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json)

View File

@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring).
## bitTest ## bitTest
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0.
**Syntax** **Syntax**
@ -226,7 +226,7 @@ Result:
## bitTestAll ## bitTestAll
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0.
The conjuction for bit-wise operations: The conjuction for bit-wise operations:
@ -289,7 +289,7 @@ Result:
## bitTestAny ## bitTestAny
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0.
The disjunction for bit-wise operations: The disjunction for bit-wise operations:

View File

@ -3860,3 +3860,138 @@ Result:
└───────────────┘ └───────────────┘
``` ```
## transactionID
Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::note
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
```
<clickhouse>
<allow_experimental_transactions>1</allow_experimental_transactions>
</clickhouse>
```
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::
**Syntax**
```sql
transactionID()
```
**Returned value**
- Returns a tuple consisting of `start_csn`, `local_tid` and `host_id`. [Tuple](../data-types/tuple.md).
- `start_csn`: Global sequential number, the newest commit timestamp that was seen when this transaction began. [UInt64](../data-types/int-uint.md).
- `local_tid`: Local sequential number that is unique for each transaction started by this host within a specific start_csn. [UInt64](../data-types/int-uint.md).
- `host_id`: UUID of the host that has started this transaction. [UUID](../data-types/uuid.md).
**Example**
Query:
```sql
BEGIN TRANSACTION;
SELECT transactionID();
ROLLBACK;
```
Result:
```response
┌─transactionID()────────────────────────────────┐
│ (32,34,'0ee8b069-f2bb-4748-9eae-069c85b5252b') │
└────────────────────────────────────────────────┘
```
## transactionLatestSnapshot
Returns the newest snapshot (Commit Sequence Number) of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback) that is available for reading.
:::note
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
```
<clickhouse>
<allow_experimental_transactions>1</allow_experimental_transactions>
</clickhouse>
```
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::
**Syntax**
```sql
transactionLatestSnapshot()
```
**Returned value**
- Returns the latest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md)
**Example**
Query:
```sql
BEGIN TRANSACTION;
SELECT transactionLatestSnapshot();
ROLLBACK;
```
Result:
```response
┌─transactionLatestSnapshot()─┐
│ 32 │
└─────────────────────────────┘
```
## transactionOldestSnapshot
Returns the oldest snapshot (Commit Sequence Number) that is visible for some running [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::note
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
```
<clickhouse>
<allow_experimental_transactions>1</allow_experimental_transactions>
</clickhouse>
```
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
:::
**Syntax**
```sql
transactionOldestSnapshot()
```
**Returned value**
- Returns the oldest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md)
**Example**
Query:
```sql
BEGIN TRANSACTION;
SELECT transactionLatestSnapshot();
ROLLBACK;
```
Result:
```response
┌─transactionOldestSnapshot()─┐
│ 32 │
└─────────────────────────────┘
```

View File

@ -579,7 +579,6 @@ If the length of the UTF-8 byte sequence is different for upper and lower case o
Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
@ -736,7 +735,7 @@ concat(s1, s2, ...)
**Arguments** **Arguments**
At least one value of arbitrary type. Values of arbitrary type.
Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.

View File

@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns:
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
``` ```
:::note :::note
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
::: :::

View File

@ -12,7 +12,7 @@ Syntax:
``` sql ``` sql
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}]
[[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[VALID UNTIL datetime] [VALID UNTIL datetime]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]

View File

@ -152,7 +152,7 @@ SELECT * FROM test;
`MATERIALIZED expr` `MATERIALIZED expr`
Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s.
Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.

View File

@ -12,7 +12,7 @@ Syntax:
``` sql ``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...] [, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[VALID UNTIL datetime] [VALID UNTIL datetime]
[IN access_storage_type] [IN access_storage_type]

View File

@ -269,9 +269,9 @@ FROM s3(
## Virtual Columns {#virtual-columns} ## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}"
- `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings} ## Storage Settings {#storage-settings}

View File

@ -80,8 +80,8 @@ These functions can be used only as a window function.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps. - `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps. - `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. - `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned.
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. - `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used.
## Examples ## Examples

View File

@ -3,23 +3,30 @@ slug: /ru/operations/external-authenticators/ssl-x509
--- ---
# Аутентификация по сертификату SSL X.509 {#ssl-external-authentication} # Аутентификация по сертификату SSL X.509 {#ssl-external-authentication}
[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse. [Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` или `subjectAltName` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse.
Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` для каждого пользователя ClickHouse в файле настройки `config.xml`: Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` или `subjectAltName` для каждого пользователя ClickHouse в файле настройки `config.xml`:
**Example** **Example**
```xml ```xml
<clickhouse> <clickhouse>
<!- ... --> <!- ... -->
<users> <users>
<user_name> <user_name_1>
<certificates> <ssl_certificates>
<common_name>host.domain.com:example_user</common_name> <common_name>host.domain.com:example_user</common_name>
<common_name>host.domain.com:example_user_dev</common_name> <common_name>host.domain.com:example_user_dev</common_name>
<!-- More names --> <!-- More names -->
</certificates> </ssl_certificates>
<!-- Other settings --> <!-- Other settings -->
</user_name> </user_name_1>
<user_name_2>
<ssl_certificates>
<subject_alt_name>DNS:host.domain.com</subject_alt_name>
<!-- More names -->
</ssl_certificates>
<!-- Other settings -->
</user_name_2>
</users> </users>
</clickhouse> </clickhouse>
``` ```

View File

@ -2077,7 +2077,7 @@ SELECT * FROM test_table
- 0 — оптимизация отключена. - 0 — оптимизация отключена.
- 1 — оптимизация включена. - 1 — оптимизация включена.
Значение по умолчанию: `0`. Значение по умолчанию: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query} ## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -13,7 +13,7 @@ sidebar_label: "Пользователь"
``` sql ``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...] [, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...]] [DEFAULT ROLE role [,...]]
[DEFAULT DATABASE database | NONE] [DEFAULT DATABASE database | NONE]

View File

@ -3,6 +3,7 @@
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime64.h> #include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnNullable.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
@ -47,9 +48,17 @@ Chunk ODBCSource::generate()
for (int idx = 0; idx < result.columns(); ++idx) for (int idx = 0; idx < result.columns(); ++idx)
{ {
const auto & sample = description.sample_block.getByPosition(idx); const auto & sample = description.sample_block.getByPosition(idx);
if (!result.is_null(idx)) if (!result.is_null(idx))
insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); {
if (columns[idx]->isNullable())
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx);
}
else else
insertDefaultValue(*columns[idx], *sample.column); insertDefaultValue(*columns[idx], *sample.column);
} }

View File

@ -133,10 +133,6 @@
# include <Server/KeeperTCPHandlerFactory.h> # include <Server/KeeperTCPHandlerFactory.h>
#endif #endif
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#endif
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp> # include <azure/storage/common/internal/xml_wrapper.hpp>
# include <azure/core/diagnostics/logger.hpp> # include <azure/core/diagnostics/logger.hpp>
@ -176,34 +172,10 @@ namespace ProfileEvents
namespace fs = std::filesystem; namespace fs = std::filesystem;
#if USE_JEMALLOC
static bool jemallocOptionEnabled(const char *name)
{
bool value;
size_t size = sizeof(value);
if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
throw Poco::SystemException("mallctl() failed");
return value;
}
#else
static bool jemallocOptionEnabled(const char *) { return false; }
#endif
int mainEntryClickHouseServer(int argc, char ** argv) int mainEntryClickHouseServer(int argc, char ** argv)
{ {
DB::Server app; DB::Server app;
if (jemallocOptionEnabled("opt.background_thread"))
{
LOG_ERROR(&app.logger(),
"jemalloc.background_thread was requested, "
"however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
"and also background_thread is not compatible with ClickHouse watchdog "
"(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
}
/// Do not fork separate process from watchdog if we attached to terminal. /// Do not fork separate process from watchdog if we attached to terminal.
/// Otherwise it breaks gdb usage. /// Otherwise it breaks gdb usage.
/// Can be overridden by environment variable (cannot use server config at this moment). /// Can be overridden by environment variable (cannot use server config at this moment).

View File

@ -8,6 +8,7 @@
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/SSHWrapper.h> #include <Common/SSHWrapper.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include "config.h" #include "config.h"
@ -238,7 +239,15 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm()); throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());
case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSL_CERTIFICATE:
return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
{
for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type))
{
if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject))
return true;
}
}
return false;
case AuthenticationType::SSH_KEY: case AuthenticationType::SSH_KEY:
#if USE_SSH #if USE_SSH

View File

@ -15,6 +15,7 @@
#include <boost/algorithm/hex.hpp> #include <boost/algorithm/hex.hpp>
#include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/case_conv.hpp>
#include <Access/Common/SSLCertificateSubjects.h>
#include "config.h" #include "config.h"
#if USE_SSL #if USE_SSL
@ -107,7 +108,7 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
{ {
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names) && (lhs.ssl_certificate_subjects == rhs.ssl_certificate_subjects)
#if USE_SSH #if USE_SSH
&& (lhs.ssh_keys == rhs.ssh_keys) && (lhs.ssh_keys == rhs.ssh_keys)
#endif #endif
@ -277,11 +278,16 @@ String AuthenticationData::getSalt() const
return salt; return salt;
} }
void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_) void AuthenticationData::setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_)
{ {
if (common_names_.empty()) if (ssl_certificate_subjects_.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names."); throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of subjects.");
ssl_certificate_common_names = std::move(common_names_); ssl_certificate_subjects = std::move(ssl_certificate_subjects_);
}
void AuthenticationData::addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_)
{
ssl_certificate_subjects.insert(type_, std::move(subject_));
} }
std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
@ -339,7 +345,14 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
} }
case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSL_CERTIFICATE:
{ {
for (const auto & name : getSSLCertificateCommonNames()) using SSLCertificateSubjects::Type::CN;
using SSLCertificateSubjects::Type::SAN;
const auto &subjects = getSSLCertificateSubjects();
SSLCertificateSubjects::Type cert_subject_type = !subjects.at(SAN).empty() ? SAN : CN;
node->ssl_cert_subject_type = toString(cert_subject_type);
for (const auto & name : getSSLCertificateSubjects().at(cert_subject_type))
node->children.push_back(std::make_shared<ASTLiteral>(name)); node->children.push_back(std::make_shared<ASTLiteral>(name));
break; break;
@ -513,11 +526,9 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
} }
else if (query.type == AuthenticationType::SSL_CERTIFICATE) else if (query.type == AuthenticationType::SSL_CERTIFICATE)
{ {
boost::container::flat_set<String> common_names; auto ssl_cert_subject_type = parseSSLCertificateSubjectType(*query.ssl_cert_subject_type);
for (const auto & arg : args) for (const auto & arg : args)
common_names.insert(checkAndGetLiteralArgument<String>(arg, "common_name")); auth_data.addSSLCertificateSubject(ssl_cert_subject_type, checkAndGetLiteralArgument<String>(arg, "ssl_certificate_subject"));
auth_data.setSSLCertificateCommonNames(std::move(common_names));
} }
else if (query.type == AuthenticationType::HTTP) else if (query.type == AuthenticationType::HTTP)
{ {

View File

@ -2,13 +2,14 @@
#include <Access/Common/AuthenticationType.h> #include <Access/Common/AuthenticationType.h>
#include <Access/Common/HTTPAuthenticationScheme.h> #include <Access/Common/HTTPAuthenticationScheme.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Common/SSHWrapper.h> #include <Common/SSHWrapper.h>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h> #include <Parsers/Access/ASTAuthenticationData.h>
#include <vector> #include <vector>
#include <base/types.h> #include <base/types.h>
#include <boost/container/flat_set.hpp>
#include "config.h" #include "config.h"
@ -58,8 +59,9 @@ public:
const String & getKerberosRealm() const { return kerberos_realm; } const String & getKerberosRealm() const { return kerberos_realm; }
void setKerberosRealm(const String & realm) { kerberos_realm = realm; } void setKerberosRealm(const String & realm) { kerberos_realm = realm; }
const boost::container::flat_set<String> & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; } const SSLCertificateSubjects & getSSLCertificateSubjects() const { return ssl_certificate_subjects; }
void setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_); void setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_);
void addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_);
#if USE_SSH #if USE_SSH
const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; } const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; }
@ -96,7 +98,7 @@ private:
Digest password_hash; Digest password_hash;
String ldap_server_name; String ldap_server_name;
String kerberos_realm; String kerberos_realm;
boost::container::flat_set<String> ssl_certificate_common_names; SSLCertificateSubjects ssl_certificate_subjects;
String salt; String salt;
#if USE_SSH #if USE_SSH
std::vector<SSHKey> ssh_keys; std::vector<SSHKey> ssh_keys;

View File

@ -0,0 +1,95 @@
#include <Access/Common/SSLCertificateSubjects.h>
#include <Common/Exception.h>
#if USE_SSL
#include <openssl/x509v3.h>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
#if USE_SSL
SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate)
{
SSLCertificateSubjects subjects;
if (!certificate.commonName().empty())
{
subjects.insert(SSLCertificateSubjects::Type::CN, certificate.commonName());
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
auto stackof_general_name_deleter = [](void * ptr) { GENERAL_NAMES_free(static_cast<STACK_OF(GENERAL_NAME) *>(ptr)); };
std::unique_ptr<void, decltype(stackof_general_name_deleter)> cert_names(
X509_get_ext_d2i(const_cast<X509 *>(certificate.certificate()), NID_subject_alt_name, nullptr, nullptr),
stackof_general_name_deleter);
if (STACK_OF(GENERAL_NAME) * names = static_cast<STACK_OF(GENERAL_NAME) *>(cert_names.get()))
{
for (int i = 0; i < sk_GENERAL_NAME_num(names); ++i)
{
const GENERAL_NAME * name = sk_GENERAL_NAME_value(names, i);
if (name->type == GEN_DNS || name->type == GEN_URI)
{
const char * data = reinterpret_cast<const char *>(ASN1_STRING_get0_data(name->d.ia5));
std::size_t len = ASN1_STRING_length(name->d.ia5);
std::string subject = (name->type == GEN_DNS ? "DNS:" : "URI:") + std::string(data, len);
subjects.insert(SSLCertificateSubjects::Type::SAN, std::move(subject));
}
}
}
#pragma clang diagnostic pop
return subjects;
}
#endif
void SSLCertificateSubjects::insert(const String & subject_type_, String && subject)
{
insert(parseSSLCertificateSubjectType(subject_type_), std::move(subject));
}
void SSLCertificateSubjects::insert(Type subject_type_, String && subject)
{
subjects[static_cast<size_t>(subject_type_)].insert(std::move(subject));
}
SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_)
{
if (type_ == "CN")
return SSLCertificateSubjects::Type::CN;
if (type_ == "SAN")
return SSLCertificateSubjects::Type::SAN;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown SSL Certificate Subject Type: {}", type_);
}
String toString(SSLCertificateSubjects::Type type_)
{
switch (type_)
{
case SSLCertificateSubjects::Type::CN:
return "CN";
case SSLCertificateSubjects::Type::SAN:
return "SAN";
}
}
bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs)
{
for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
{
if (lhs.at(type) != rhs.at(type))
return false;
}
return true;
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include "config.h"
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#if USE_SSL
# include <Poco/Net/X509Certificate.h>
#endif
namespace DB
{
class SSLCertificateSubjects
{
public:
using container = boost::container::flat_set<String>;
enum class Type
{
CN,
SAN
};
private:
std::array<container, size_t(Type::SAN) + 1> subjects;
public:
inline const container & at(Type type_) const { return subjects[static_cast<size_t>(type_)]; }
inline bool empty()
{
for (auto & subject_list : subjects)
{
if (!subject_list.empty())
return false;
}
return true;
}
void insert(const String & subject_type_, String && subject);
void insert(Type type_, String && subject);
friend bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs);
};
String toString(SSLCertificateSubjects::Type type_);
SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_);
#if USE_SSL
SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate);
#endif
}

View File

@ -1,7 +1,7 @@
#include <Access/Credentials.h> #include <Access/Credentials.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Common/Exception.h> #include <Common/Exception.h>
namespace DB namespace DB
{ {
@ -48,18 +48,18 @@ void AlwaysAllowCredentials::setUserName(const String & user_name_)
user_name = user_name_; user_name = user_name_;
} }
SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, const String & common_name_) SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_)
: Credentials(user_name_) : Credentials(user_name_)
, common_name(common_name_) , certificate_subjects(subjects_)
{ {
is_ready = true; is_ready = true;
} }
const String & SSLCertificateCredentials::getCommonName() const const SSLCertificateSubjects & SSLCertificateCredentials::getSSLCertificateSubjects() const
{ {
if (!isReady()) if (!isReady())
throwNotReady(); throwNotReady();
return common_name; return certificate_subjects;
} }
BasicCredentials::BasicCredentials() BasicCredentials::BasicCredentials()

View File

@ -1,6 +1,8 @@
#pragma once #pragma once
#include <base/types.h> #include <base/types.h>
#include <boost/container/flat_set.hpp>
#include <Access/Common/SSLCertificateSubjects.h>
#include <memory> #include <memory>
#include "config.h" #include "config.h"
@ -42,11 +44,11 @@ class SSLCertificateCredentials
: public Credentials : public Credentials
{ {
public: public:
explicit SSLCertificateCredentials(const String & user_name_, const String & common_name_); explicit SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_);
const String & getCommonName() const; const SSLCertificateSubjects & getSSLCertificateSubjects() const;
private: private:
String common_name; SSLCertificateSubjects certificate_subjects;
}; };
class BasicCredentials class BasicCredentials

View File

@ -1,4 +1,5 @@
#include <Access/UsersConfigAccessStorage.h> #include <Access/UsersConfigAccessStorage.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Access/Quota.h> #include <Access/Quota.h>
#include <Access/RowPolicy.h> #include <Access/RowPolicy.h>
#include <Access/User.h> #include <Access/User.h>
@ -194,18 +195,23 @@ namespace
/// Fill list of allowed certificates. /// Fill list of allowed certificates.
Poco::Util::AbstractConfiguration::Keys keys; Poco::Util::AbstractConfiguration::Keys keys;
config.keys(certificates_config, keys); config.keys(certificates_config, keys);
boost::container::flat_set<String> common_names;
for (const String & key : keys) for (const String & key : keys)
{ {
if (key.starts_with("common_name")) if (key.starts_with("common_name"))
{ {
String value = config.getString(certificates_config + "." + key); String value = config.getString(certificates_config + "." + key);
common_names.insert(std::move(value)); user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::CN, std::move(value));
}
else if (key.starts_with("subject_alt_name"))
{
String value = config.getString(certificates_config + "." + key);
if (value.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty");
user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::SAN, std::move(value));
} }
else else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key);
} }
user->auth_data.setSSLCertificateCommonNames(std::move(common_names));
} }
else if (has_ssh_keys) else if (has_ssh_keys)
{ {

View File

@ -9,6 +9,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -164,32 +165,15 @@ private:
auto aggregate_function_clone = aggregate_function->clone(); auto aggregate_function_clone = aggregate_function->clone();
auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>(); auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>();
aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument };
resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name);
arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone);
resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext());
return arithmetic_function_clone; return arithmetic_function_clone;
} }
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}; };
} }

View File

@ -11,6 +11,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -18,19 +19,18 @@ namespace DB
namespace namespace
{ {
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor<ComparisonTupleEliminationPassVisitor> class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>
{ {
public: public:
explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) using Base = InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>;
: context(std::move(context_)) using Base::Base;
{}
static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
{ {
return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION;
} }
void visitImpl(QueryTreeNodePtr & node) const void enterImpl(QueryTreeNodePtr & node) const
{ {
auto * function_node = node->as<FunctionNode>(); auto * function_node = node->as<FunctionNode>();
if (!function_node) if (!function_node)
@ -171,13 +171,13 @@ private:
{ {
auto result_function = std::make_shared<FunctionNode>("and"); auto result_function = std::make_shared<FunctionNode>("and");
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext());
if (comparison_function_name == "notEquals") if (comparison_function_name == "notEquals")
{ {
auto not_function = std::make_shared<FunctionNode>("not"); auto not_function = std::make_shared<FunctionNode>("not");
not_function->getArguments().getNodes().push_back(std::move(result_function)); not_function->getArguments().getNodes().push_back(std::move(result_function));
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext());
result_function = std::move(not_function); result_function = std::move(not_function);
} }
@ -197,18 +197,10 @@ private:
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext());
return comparison_function; return comparison_function;
} }
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
ContextPtr context;
}; };
} }

View File

@ -9,6 +9,7 @@
#include <Analyzer/ColumnNode.h> #include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h> #include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -77,11 +78,9 @@ public:
/// Replace `countDistinct` of initial query into `count` /// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType(); auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
} }
}; };

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h> #include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeVariant.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
@ -16,6 +17,9 @@
#include <Analyzer/ColumnNode.h> #include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h> #include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/JoinNode.h>
namespace DB namespace DB
{ {
@ -23,202 +27,410 @@ namespace DB
namespace namespace
{ {
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor> struct ColumnContext
{
NameAndTypePair column;
QueryTreeNodePtr column_source;
ContextPtr context;
};
using NodeToSubcolumnTransformer = std::function<void(QueryTreeNodePtr &, FunctionNode &, ColumnContext &)>;
void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `length(argument)` with `argument.size0`
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
template <bool positive>
void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive
/// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, ctx.column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
const auto * function_name = positive ? "equals" : "notEquals";
resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context);
}
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
if (value.getType() == Field::Types::UInt64)
return data_type_tuple.getNameByPosition(value.get<UInt64>());
return "";
}
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
return "";
}
template <typename DataType>
void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`.
/// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`.
auto & function_arguments_nodes = function_node.getArguments().getNodes();
if (function_arguments_nodes.size() != 2)
return;
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (!second_argument_constant_node)
return;
const auto & data_type_concrete = assert_cast<const DataType &>(*ctx.column.type);
auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete);
if (subcolumn_name.empty())
return;
NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
std::map<std::pair<TypeIndex, String>, NodeToSubcolumnTransformer> node_transformers =
{
{
{TypeIndex::Array, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Array, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Map, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "mapKeys"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapValues"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapContains"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
const auto & data_type_map = assert_cast<const DataTypeMap &>(*ctx.column.type);
NameAndTypePair column{ctx.column.name + ".keys", std::make_shared<DataTypeArray>(data_type_map.getKeyType())};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto has_function_argument = std::make_shared<ColumnNode>(column, ctx.column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context);
},
},
{
{TypeIndex::Nullable, "count"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto new_column_node = std::make_shared<ColumnNode>(column, ctx.column_source);
auto function_node_not = std::make_shared<FunctionNode>("not");
function_node_not->getArguments().getNodes().push_back(std::move(new_column_node));
resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context);
function_arguments_nodes = {std::move(function_node_not)};
resolveAggregateFunctionNodeByName(function_node, "sum");
},
},
{
{TypeIndex::Nullable, "isNull"},
[](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Nullable, "isNotNull"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, ctx.column_source)};
resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context);
},
},
{
{TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement<DataTypeTuple>,
},
{
{TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement<DataTypeVariant>,
},
};
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return {};
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2)
return {};
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set")
return {};
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return {};
const auto & storage = table_node->getStorage();
const auto & storage_snapshot = table_node->getStorageSnapshot();
auto column = first_argument_column_node->getColumn();
if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata))
return {};
auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name);
if (!column_in_table || !column_in_table->type->equals(*column.type))
return {};
return std::make_tuple(function_node, first_argument_column_node, table_node);
}
/// First pass collects info about identifiers to determine which identifiers are allowed to optimize.
class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>
{ {
public: public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>; using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>;
using Base::Base; using Base::Base;
void enterImpl(const QueryTreeNodePtr & node)
{
if (!getSettings().optimize_functions_to_subcolumns)
return;
if (auto * table_node = node->as<TableNode>())
{
enterImpl(*table_node);
return;
}
if (auto * column_node = node->as<ColumnNode>())
{
enterImpl(*column_node);
return;
}
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node);
if (function_node && first_argument_node && table_node)
{
enterImpl(*function_node, *first_argument_node, *table_node);
return;
}
if (const auto * join_node = node->as<JoinNode>())
{
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls;
return;
}
if (const auto * query_node = node->as<QueryNode>())
{
if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets())
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls;
return;
}
}
std::unordered_set<Identifier> getIdentifiersToOptimize() const
{
if (can_wrap_result_columns_with_nullable)
{
/// Do not optimize if we have JOIN with setting join_use_null.
/// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls.
/// It may change the behaviour if subcolumn can be converted
/// to Nullable while the original column cannot (e.g. for Array type).
return {};
}
/// Do not optimize if full column is requested in other context.
/// It doesn't make sense because it doesn't reduce amount of read data
/// and optimized functions are not computation heavy. But introducing
/// new identifier complicates query analysis and may break it.
///
/// E.g. query:
/// SELECT n FROM table GROUP BY n HAVING isNotNull(n)
/// may be optimized to incorrect query:
/// SELECT n FROM table GROUP BY n HAVING not(n.null)
/// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys)
///
/// Do not optimize index columns (primary, min-max, secondary),
/// because otherwise analysis of indexes may be broken.
/// TODO: handle subcolumns in index analysis.
std::unordered_set<Identifier> identifiers_to_optimize;
for (const auto & [identifier, count] : optimized_identifiers_count)
{
if (all_key_columns.contains(identifier))
continue;
auto it = identifiers_count.find(identifier);
if (it != identifiers_count.end() && it->second == count)
identifiers_to_optimize.insert(identifier);
}
return identifiers_to_optimize;
}
private:
std::unordered_set<Identifier> all_key_columns;
std::unordered_map<Identifier, UInt64> identifiers_count;
std::unordered_map<Identifier, UInt64> optimized_identifiers_count;
NameSet processed_tables;
bool can_wrap_result_columns_with_nullable = false;
void enterImpl(const TableNode & table_node)
{
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
if (processed_tables.emplace(table_name).second)
return;
auto add_key_columns = [&](const auto & key_columns)
{
for (const auto & column_name : key_columns)
{
Identifier identifier({table_name, column_name});
all_key_columns.insert(identifier);
}
};
const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata;
const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey();
const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey();
add_key_columns(primary_key_columns);
add_key_columns(partition_key_columns);
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
const auto & index_columns = index.expression->getRequiredColumns();
add_key_columns(index_columns);
}
}
void enterImpl(const ColumnNode & column_node)
{
if (column_node.getColumnName() == "__grouping_set")
return;
auto column_source = column_node.getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column_node.getColumnName()});
++identifiers_count[qualified_name];
}
void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node)
{
/// For queries with FINAL converting function to subcolumn may alter
/// special merging algorithms and produce wrong result of query.
if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal())
return;
const auto & column = first_argument_column_node.getColumn();
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column.name});
if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()}))
++optimized_identifiers_count[qualified_name];
}
};
/// Second pass optimizes functions to subcolumns for allowed identifiers.
class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>
{
private:
std::unordered_set<Identifier> identifiers_to_optimize;
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>;
using Base::Base;
FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set<Identifier> identifiers_to_optimize_)
: Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_))
{
}
void enterImpl(QueryTreeNodePtr & node) const void enterImpl(QueryTreeNodePtr & node) const
{ {
if (!getSettings().optimize_functions_to_subcolumns) if (!getSettings().optimize_functions_to_subcolumns)
return; return;
auto * function_node = node->as<FunctionNode>(); auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node);
if (!function_node) if (!function_node || !first_argument_column_node || !table_node)
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
size_t function_arguments_nodes_size = function_arguments_nodes.size();
if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2)
return;
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node)
return;
if (first_argument_column_node->getColumnName() == "__grouping_set")
return;
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
const auto & storage = table_node->getStorage();
if (!storage->supportsSubcolumns())
return; return;
auto column = first_argument_column_node->getColumn(); auto column = first_argument_column_node->getColumn();
WhichDataType column_type(column.type); auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
const auto & function_name = function_node->getFunctionName(); Identifier qualified_name({table_name, column.name});
if (!identifiers_to_optimize.contains(qualified_name))
return;
if (function_arguments_nodes_size == 1) auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()});
if (transformer_it != node_transformers.end())
{ {
if (column_type.isArray()) ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()};
{ transformer_it->second(node, *function_node, ctx);
if (function_name == "length")
{
/// Replace `length(array_argument)` with `array_argument.size0`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "empty")
{
/// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "equals");
}
else if (function_name == "notEmpty")
{
/// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "notEquals");
}
}
else if (column_type.isNullable())
{
if (function_name == "isNull")
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "isNotNull")
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, column_source)};
resolveOrdinaryFunctionNode(*function_node, "not");
}
}
else if (column_type.isMap())
{
if (function_name == "mapKeys")
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
column.name += ".keys";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapValues")
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
column.name += ".values";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
}
} }
else
{
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node)
{
/** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)`
* with `tuple_argument.column_name`.
*/
const auto & tuple_element_constant_value = second_argument_constant_node->getValue();
const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType();
const auto & data_type_tuple = assert_cast<const DataTypeTuple &>(*column.type);
String subcolumn_name;
if (tuple_element_constant_value_type == Field::Types::String)
{
subcolumn_name = tuple_element_constant_value.get<const String &>();
}
else if (tuple_element_constant_value_type == Field::Types::UInt64)
{
auto tuple_column_index = tuple_element_constant_value.get<UInt64>();
subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index);
}
else
{
return;
}
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap())
{
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
column.name += ".keys";
column.type = std::make_shared<DataTypeArray>(data_type_map.getKeyType());
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNode(*function_node, "has");
}
}
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
} }
}; };
@ -226,8 +438,15 @@ private:
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{ {
FunctionToSubcolumnsVisitor visitor(context); FunctionToSubcolumnsVisitorFirstPass first_visitor(context);
visitor.visit(query_tree_node); first_visitor.visit(query_tree_node);
auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize();
if (identifiers_to_optimize.empty())
return;
FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize));
second_visitor.visit(query_tree_node);
} }
} }

View File

@ -6,6 +6,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
@ -47,25 +48,17 @@ public:
if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
{ {
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
} }
else if (function_node->getFunctionName() == "sum" && else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1) first_argument_constant_literal.get<UInt64>() == 1)
{ {
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
} }
} }
private:
static void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}; };
} }

View File

@ -5,6 +5,7 @@
#include <Analyzer/ColumnNode.h> #include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Common/DateLUT.h> #include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h> #include <Common/DateLUTImpl.h>

View File

@ -74,8 +74,7 @@ public:
new_arguments[1] = std::move(if_arguments_nodes[0]); new_arguments[1] = std::move(if_arguments_nodes[0]);
function_arguments_nodes = std::move(new_arguments); function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf( resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
} }
} }
else if (first_const_node) else if (first_const_node)
@ -104,27 +103,10 @@ public:
new_arguments[1] = std::move(not_function); new_arguments[1] = std::move(not_function);
function_arguments_nodes = std::move(new_arguments); function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf( resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
} }
} }
} }
private:
static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + "If",
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}; };
} }

View File

@ -73,23 +73,24 @@ public:
const auto lhs = std::make_shared<FunctionNode>("sum"); const auto lhs = std::make_shared<FunctionNode>("sum");
lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*lhs, column_type); resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName());
const auto rhs_count = std::make_shared<FunctionNode>("count"); const auto rhs_count = std::make_shared<FunctionNode>("count");
rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*rhs_count, column_type); resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("multiply"); const auto rhs = std::make_shared<FunctionNode>("multiply");
rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]);
rhs->getArguments().getNodes().push_back(rhs_count); rhs->getArguments().getNodes().push_back(rhs_count);
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName())); auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
if (column_id == 0) if (column_id == 0)
new_node->getArguments().getNodes() = {lhs, rhs}; new_node->getArguments().getNodes() = {lhs, rhs};
else if (column_id == 1) else if (column_id == 1)
new_node->getArguments().getNodes() = {rhs, lhs}; new_node->getArguments().getNodes() = {rhs, lhs};
resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext());
if (!new_node) if (!new_node)
return; return;
@ -100,28 +101,7 @@ public:
res = createCastFunction(res, function_node->getResultType(), getContext()); res = createCastFunction(res, function_node->getResultType(), getContext());
node = std::move(res); node = std::move(res);
} }
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
const auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),
NullsAction::EMPTY,
{argument_type},
{},
properties);
function_node.resolveAsAggregateFunction(aggregate_function);
}
}; };
} }

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/Utils.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
@ -65,7 +66,8 @@ public:
auto multiplier_node = function_node_arguments_nodes[0]; auto multiplier_node = function_node_arguments_nodes[0];
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
function_node_arguments_nodes.resize(1); function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (constant_value_literal.get<UInt64>() != 1) if (constant_value_literal.get<UInt64>() != 1)
{ {
@ -115,7 +117,7 @@ public:
function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1); function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_true_condition_value != 1) if (if_true_condition_value != 1)
{ {
@ -144,7 +146,7 @@ public:
function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes[0] = std::move(not_function);
function_node_arguments_nodes.resize(1); function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_false_condition_value != 1) if (if_false_condition_value != 1)
{ {
@ -156,15 +158,6 @@ public:
} }
private: private:
static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
{ {
auto multiply_function_node = std::make_shared<FunctionNode>("multiply"); auto multiply_function_node = std::make_shared<FunctionNode>("multiply");

View File

@ -7,6 +7,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB

View File

@ -7,6 +7,7 @@
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h> #include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -184,11 +185,8 @@ public:
/// Replace uniq of initial query to count /// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by()) if (match_subquery_with_distinct() || match_subquery_with_group_by())
{ {
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function)); resolveAggregateFunctionNodeByName(*function_node, "count");
} }
} }
}; };

View File

@ -636,16 +636,16 @@ private:
bool has_function = false; bool has_function = false;
}; };
inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name)
{ {
Array parameters; Array parameters;
for (const auto & param : function_node->getParameters()) for (const auto & param : function_node.getParameters())
{ {
auto * constant = param->as<ConstantNode>(); auto * constant = param->as<ConstantNode>();
parameters.push_back(constant->getValue()); parameters.push_back(constant->getValue());
} }
const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); const auto & function_node_argument_nodes = function_node.getArguments().getNodes();
DataTypes argument_types; DataTypes argument_types;
argument_types.reserve(function_node_argument_nodes.size()); argument_types.reserve(function_node_argument_nodes.size());
@ -655,7 +655,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY; auto action = NullsAction::EMPTY;
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties);
} }
} }
@ -736,11 +736,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context)
{ {
if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull") if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull")
return; return;
function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
} }
else if (function_node->isWindowFunction()) else if (function_node->isWindowFunction())
{ {
function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
} }
} }
@ -793,6 +793,18 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node; return function_node;
} }
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context)
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name)
{
auto aggregate_function = resolveAggregateFunction(function_node, function_name);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
/** Returns: /** Returns:
* {_, false} - multiple sources * {_, false} - multiple sources
* {nullptr, true} - no sources (for constants) * {nullptr, true} - no sources (for constants)

View File

@ -112,6 +112,14 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function /// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Resolves function node as ordinary function with given name.
/// Arguments and parameters are taken from the node.
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context);
/// Resolves function node as aggregate function with given name.
/// Arguments and parameters are taken from the node.
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name);
/// Checks that node has only one source and returns it /// Checks that node has only one source and returns it
QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node);

View File

@ -210,6 +210,7 @@ add_object_library(clickhouse_analyzer_passes Analyzer/Resolve)
add_object_library(clickhouse_planner Planner) add_object_library(clickhouse_planner Planner)
add_object_library(clickhouse_interpreters Interpreters) add_object_library(clickhouse_interpreters Interpreters)
add_object_library(clickhouse_interpreters_cache Interpreters/Cache) add_object_library(clickhouse_interpreters_cache Interpreters/Cache)
add_object_library(clickhouse_interpreters_hash_join Interpreters/HashJoin)
add_object_library(clickhouse_interpreters_access Interpreters/Access) add_object_library(clickhouse_interpreters_access Interpreters/Access)
add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL) add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL)
add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy) add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy)

View File

@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
if (local_format_error) if (local_format_error)
std::rethrow_exception(local_format_error); std::rethrow_exception(local_format_error);
if (cancelled && is_interactive) if (cancelled && is_interactive && !cancelled_printed.exchange(true))
{
output_stream << "Query was cancelled." << std::endl; output_stream << "Query was cancelled." << std::endl;
cancelled_printed = true;
}
} }
@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream()
if (is_interactive) if (is_interactive)
{ {
if (cancelled && !cancelled_printed) if (cancelled && !cancelled_printed.exchange(true))
output_stream << "Query was cancelled." << std::endl; output_stream << "Query was cancelled." << std::endl;
else if (!written_first_block) else if (!written_first_block)
output_stream << "Ok." << std::endl; output_stream << "Ok." << std::endl;

View File

@ -338,8 +338,8 @@ protected:
bool allow_repeated_settings = false; bool allow_repeated_settings = false;
bool allow_merge_tree_settings = false; bool allow_merge_tree_settings = false;
bool cancelled = false; std::atomic_bool cancelled = false;
bool cancelled_printed = false; std::atomic_bool cancelled_printed = false;
/// Unpacked descriptors and streams for the ease of use. /// Unpacked descriptors and streams for the ease of use.
int in_fd = STDIN_FILENO; int in_fd = STDIN_FILENO;

View File

@ -1,8 +1,6 @@
#pragma once #pragma once
#include <deque> #include <deque>
#include <type_traits>
#include <atomic>
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
#include <optional> #include <optional>
@ -200,22 +198,18 @@ public:
*/ */
bool finish() bool finish()
{ {
bool was_finished_before = false;
{ {
std::lock_guard lock(queue_mutex); std::lock_guard lock(queue_mutex);
if (is_finished) if (is_finished)
return true; return true;
was_finished_before = is_finished;
is_finished = true; is_finished = true;
} }
pop_condition.notify_all(); pop_condition.notify_all();
push_condition.notify_all(); push_condition.notify_all();
return false;
return was_finished_before;
} }
/// Returns if queue is finished /// Returns if queue is finished

View File

@ -447,14 +447,18 @@ The server successfully detected this situation and will download merged part fr
M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
\ \
M(AzureGetObject, "Number of Azure API GetObject calls.") \ M(AzureGetObject, "Number of Azure API GetObject calls.") \
M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ M(AzureUpload, "Number of Azure blob storage API Upload calls") \
M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \
M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \
M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \
\ \
M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \
M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \
M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \
M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \
M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \
M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
@ -611,6 +615,13 @@ The server successfully detected this situation and will download merged part fr
M(KeeperPacketsReceived, "Packets received by keeper server") \ M(KeeperPacketsReceived, "Packets received by keeper server") \
M(KeeperRequestTotal, "Total requests number on keeper server") \ M(KeeperRequestTotal, "Total requests number on keeper server") \
M(KeeperLatency, "Keeper latency") \ M(KeeperLatency, "Keeper latency") \
M(KeeperTotalElapsedMicroseconds, "Keeper total latency for a single request") \
M(KeeperProcessElapsedMicroseconds, "Keeper commit latency for a single request") \
M(KeeperPreprocessElapsedMicroseconds, "Keeper preprocessing latency for a single reuquest") \
M(KeeperStorageLockWaitMicroseconds, "Time spent waiting for acquiring Keeper storage lock") \
M(KeeperCommitWaitElapsedMicroseconds, "Time spent waiting for certain log to be committed") \
M(KeeperBatchMaxCount, "Number of times the size of batch was limited by the amount") \
M(KeeperBatchMaxTotalSize, "Number of times the size of batch was limited by the total bytes size") \
M(KeeperCommits, "Number of successful commits") \ M(KeeperCommits, "Number of successful commits") \
M(KeeperCommitsFailed, "Number of failed commits") \ M(KeeperCommitsFailed, "Number of failed commits") \
M(KeeperSnapshotCreations, "Number of snapshots creations")\ M(KeeperSnapshotCreations, "Number of snapshots creations")\

View File

@ -9,7 +9,6 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <fmt/format.h> #include <fmt/format.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <array>
namespace Coordination namespace Coordination
@ -29,7 +28,7 @@ void ZooKeeperResponse::write(WriteBuffer & out) const
Coordination::write(buf.str(), out); Coordination::write(buf.str(), out);
} }
std::string ZooKeeperRequest::toString() const std::string ZooKeeperRequest::toString(bool short_format) const
{ {
return fmt::format( return fmt::format(
"XID = {}\n" "XID = {}\n"
@ -37,7 +36,7 @@ std::string ZooKeeperRequest::toString() const
"Additional info:\n{}", "Additional info:\n{}",
xid, xid,
getOpNum(), getOpNum(),
toStringImpl()); toStringImpl(short_format));
} }
void ZooKeeperRequest::write(WriteBuffer & out) const void ZooKeeperRequest::write(WriteBuffer & out) const
@ -60,7 +59,7 @@ void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
Coordination::read(path, in); Coordination::read(path, in);
} }
std::string ZooKeeperSyncRequest::toStringImpl() const std::string ZooKeeperSyncRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}", path); return fmt::format("path = {}", path);
} }
@ -91,7 +90,7 @@ void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in); Coordination::read(version, in);
} }
std::string ZooKeeperReconfigRequest::toStringImpl() const std::string ZooKeeperReconfigRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
"joining = {}\nleaving = {}\nnew_members = {}\nversion = {}", "joining = {}\nleaving = {}\nnew_members = {}\nversion = {}",
@ -145,7 +144,7 @@ void ZooKeeperAuthRequest::readImpl(ReadBuffer & in)
Coordination::read(data, in); Coordination::read(data, in);
} }
std::string ZooKeeperAuthRequest::toStringImpl() const std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
"type = {}\n" "type = {}\n"
@ -191,7 +190,7 @@ void ZooKeeperCreateRequest::readImpl(ReadBuffer & in)
is_sequential = true; is_sequential = true;
} }
std::string ZooKeeperCreateRequest::toStringImpl() const std::string ZooKeeperCreateRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
"path = {}\n" "path = {}\n"
@ -218,7 +217,7 @@ void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const
Coordination::write(version, out); Coordination::write(version, out);
} }
std::string ZooKeeperRemoveRequest::toStringImpl() const std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
"path = {}\n" "path = {}\n"
@ -245,7 +244,7 @@ void ZooKeeperExistsRequest::readImpl(ReadBuffer & in)
Coordination::read(has_watch, in); Coordination::read(has_watch, in);
} }
std::string ZooKeeperExistsRequest::toStringImpl() const std::string ZooKeeperExistsRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}", path); return fmt::format("path = {}", path);
} }
@ -272,7 +271,7 @@ void ZooKeeperGetRequest::readImpl(ReadBuffer & in)
Coordination::read(has_watch, in); Coordination::read(has_watch, in);
} }
std::string ZooKeeperGetRequest::toStringImpl() const std::string ZooKeeperGetRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}", path); return fmt::format("path = {}", path);
} }
@ -303,7 +302,7 @@ void ZooKeeperSetRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in); Coordination::read(version, in);
} }
std::string ZooKeeperSetRequest::toStringImpl() const std::string ZooKeeperSetRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
"path = {}\n" "path = {}\n"
@ -334,7 +333,7 @@ void ZooKeeperListRequest::readImpl(ReadBuffer & in)
Coordination::read(has_watch, in); Coordination::read(has_watch, in);
} }
std::string ZooKeeperListRequest::toStringImpl() const std::string ZooKeeperListRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}", path); return fmt::format("path = {}", path);
} }
@ -356,7 +355,7 @@ void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in)
list_request_type = static_cast<ListRequestType>(read_request_type); list_request_type = static_cast<ListRequestType>(read_request_type);
} }
std::string ZooKeeperFilteredListRequest::toStringImpl() const std::string ZooKeeperFilteredListRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
"path = {}\n" "path = {}\n"
@ -401,7 +400,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in); Coordination::read(version, in);
} }
std::string ZooKeeperSetACLRequest::toStringImpl() const std::string ZooKeeperSetACLRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}\nversion = {}", path, version); return fmt::format("path = {}\nversion = {}", path, version);
} }
@ -426,7 +425,7 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const
Coordination::write(path, out); Coordination::write(path, out);
} }
std::string ZooKeeperGetACLRequest::toStringImpl() const std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}", path); return fmt::format("path = {}", path);
} }
@ -455,7 +454,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in)
Coordination::read(version, in); Coordination::read(version, in);
} }
std::string ZooKeeperCheckRequest::toStringImpl() const std::string ZooKeeperCheckRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}\nversion = {}", path, version); return fmt::format("path = {}\nversion = {}", path, version);
} }
@ -600,8 +599,11 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
} }
} }
std::string ZooKeeperMultiRequest::toStringImpl() const std::string ZooKeeperMultiRequest::toStringImpl(bool short_format) const
{ {
if (short_format)
return fmt::format("Subrequests size = {}", requests.size());
auto out = fmt::memory_buffer(); auto out = fmt::memory_buffer();
for (const auto & request : requests) for (const auto & request : requests)
{ {

View File

@ -63,12 +63,12 @@ struct ZooKeeperRequest : virtual Request
/// Writes length, xid, op_num, then the rest. /// Writes length, xid, op_num, then the rest.
void write(WriteBuffer & out) const; void write(WriteBuffer & out) const;
std::string toString() const; std::string toString(bool short_format = false) const;
virtual void writeImpl(WriteBuffer &) const = 0; virtual void writeImpl(WriteBuffer &) const = 0;
virtual void readImpl(ReadBuffer &) = 0; virtual void readImpl(ReadBuffer &) = 0;
virtual std::string toStringImpl() const { return ""; } virtual std::string toStringImpl(bool /*short_format*/) const { return ""; }
static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in); static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
@ -98,7 +98,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Sync; } OpNum getOpNum() const override { return OpNum::Sync; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -123,7 +123,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Reconfig; } OpNum getOpNum() const override { return OpNum::Reconfig; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -176,7 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Auth; } OpNum getOpNum() const override { return OpNum::Auth; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -229,7 +229,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -266,7 +266,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Remove; } OpNum getOpNum() const override { return OpNum::Remove; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -293,7 +293,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Exists; } OpNum getOpNum() const override { return OpNum::Exists; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; } bool isReadRequest() const override { return true; }
@ -320,7 +320,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Get; } OpNum getOpNum() const override { return OpNum::Get; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; } bool isReadRequest() const override { return true; }
@ -347,7 +347,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Set; } OpNum getOpNum() const override { return OpNum::Set; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -375,7 +375,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::List; } OpNum getOpNum() const override { return OpNum::List; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; } bool isReadRequest() const override { return true; }
@ -395,7 +395,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest
OpNum getOpNum() const override { return OpNum::FilteredList; } OpNum getOpNum() const override { return OpNum::FilteredList; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); } size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); }
}; };
@ -428,7 +428,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest
OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; } bool isReadRequest() const override { return true; }
@ -469,7 +469,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::SetACL; } OpNum getOpNum() const override { return OpNum::SetACL; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -490,7 +490,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::GetACL; } OpNum getOpNum() const override { return OpNum::GetACL; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; } bool isReadRequest() const override { return true; }
@ -516,7 +516,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl() const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override; bool isReadRequest() const override;

View File

@ -169,6 +169,23 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
writeText("async_replication=", buf); writeText("async_replication=", buf);
write_bool(coordination_settings->async_replication); write_bool(coordination_settings->async_replication);
writeText("latest_logs_cache_size_threshold=", buf);
write_int(coordination_settings->latest_logs_cache_size_threshold);
writeText("commit_logs_cache_size_threshold=", buf);
write_int(coordination_settings->commit_logs_cache_size_threshold);
writeText("disk_move_retries_wait_ms=", buf);
write_int(coordination_settings->disk_move_retries_wait_ms);
writeText("disk_move_retries_during_init=", buf);
write_int(coordination_settings->disk_move_retries_during_init);
writeText("log_slow_total_threshold_ms=", buf);
write_int(coordination_settings->log_slow_total_threshold_ms);
writeText("log_slow_cpu_threshold_ms=", buf);
write_int(coordination_settings->log_slow_cpu_threshold_ms);
writeText("log_slow_connection_operation_threshold_ms=", buf);
write_int(coordination_settings->log_slow_connection_operation_threshold_ms);
} }
KeeperConfigurationAndSettingsPtr KeeperConfigurationAndSettingsPtr

View File

@ -58,7 +58,10 @@ struct Settings;
M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -150,12 +150,18 @@
M(S3PutObject) \ M(S3PutObject) \
M(S3GetObject) \ M(S3GetObject) \
\ \
M(AzureUploadPart) \ M(AzureUpload) \
M(DiskAzureUploadPart) \ M(DiskAzureUpload) \
M(AzureStageBlock) \
M(DiskAzureStageBlock) \
M(AzureCommitBlockList) \
M(DiskAzureCommitBlockList) \
M(AzureCopyObject) \ M(AzureCopyObject) \
M(DiskAzureCopyObject) \ M(DiskAzureCopyObject) \
M(AzureDeleteObjects) \ M(AzureDeleteObjects) \
M(DiskAzureDeleteObjects) \
M(AzureListObjects) \ M(AzureListObjects) \
M(DiskAzureListObjects) \
\ \
M(DiskS3DeleteObjects) \ M(DiskS3DeleteObjects) \
M(DiskS3CopyObject) \ M(DiskS3CopyObject) \
@ -238,6 +244,13 @@
M(KeeperPacketsReceived) \ M(KeeperPacketsReceived) \
M(KeeperRequestTotal) \ M(KeeperRequestTotal) \
M(KeeperLatency) \ M(KeeperLatency) \
M(KeeperTotalElapsedMicroseconds) \
M(KeeperProcessElapsedMicroseconds) \
M(KeeperPreprocessElapsedMicroseconds) \
M(KeeperStorageLockWaitMicroseconds) \
M(KeeperCommitWaitElapsedMicroseconds) \
M(KeeperBatchMaxCount) \
M(KeeperBatchMaxTotalSize) \
M(KeeperCommits) \ M(KeeperCommits) \
M(KeeperCommitsFailed) \ M(KeeperCommitsFailed) \
M(KeeperSnapshotCreations) \ M(KeeperSnapshotCreations) \

View File

@ -31,6 +31,13 @@ namespace CurrentMetrics
extern const Metric KeeperOutstandingRequets; extern const Metric KeeperOutstandingRequets;
} }
namespace ProfileEvents
{
extern const Event KeeperCommitWaitElapsedMicroseconds;
extern const Event KeeperBatchMaxCount;
extern const Event KeeperBatchMaxTotalSize;
}
using namespace std::chrono_literals; using namespace std::chrono_literals;
namespace DB namespace DB
@ -119,6 +126,7 @@ void KeeperDispatcher::requestThread()
auto coordination_settings = configuration_and_settings->coordination_settings; auto coordination_settings = configuration_and_settings->coordination_settings;
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size; uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size;
size_t max_batch_size = coordination_settings->max_requests_batch_size;
/// The code below do a very simple thing: batch all write (quorum) requests into vector until /// The code below do a very simple thing: batch all write (quorum) requests into vector until
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from /// previous write batch is not finished or max_batch size achieved. The main complexity goes from
@ -188,7 +196,6 @@ void KeeperDispatcher::requestThread()
return false; return false;
}; };
size_t max_batch_size = coordination_settings->max_requests_batch_size;
while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request
&& current_batch_bytes_size < max_batch_bytes_size && try_get_request()) && current_batch_bytes_size < max_batch_bytes_size && try_get_request())
; ;
@ -225,6 +232,12 @@ void KeeperDispatcher::requestThread()
/// Process collected write requests batch /// Process collected write requests batch
if (!current_batch.empty()) if (!current_batch.empty())
{ {
if (current_batch.size() == max_batch_size)
ProfileEvents::increment(ProfileEvents::KeeperBatchMaxCount, 1);
if (current_batch_bytes_size == max_batch_bytes_size)
ProfileEvents::increment(ProfileEvents::KeeperBatchMaxTotalSize, 1);
LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size); LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size);
auto result = server->putRequestBatch(current_batch); auto result = server->putRequestBatch(current_batch);
@ -243,6 +256,8 @@ void KeeperDispatcher::requestThread()
/// If we will execute read or reconfig next, we have to process result now /// If we will execute read or reconfig next, we have to process result now
if (execute_requests_after_write) if (execute_requests_after_write)
{ {
Stopwatch watch;
SCOPE_EXIT(ProfileEvents::increment(ProfileEvents::KeeperCommitWaitElapsedMicroseconds, watch.elapsedMicroseconds()));
if (prev_result) if (prev_result)
result_buf = forceWaitAndProcessResult( result_buf = forceWaitAndProcessResult(
prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write);
@ -319,19 +334,13 @@ void KeeperDispatcher::snapshotThread()
{ {
setThreadName("KeeperSnpT"); setThreadName("KeeperSnpT");
const auto & shutdown_called = keeper_context->isShutdownCalled(); const auto & shutdown_called = keeper_context->isShutdownCalled();
while (!shutdown_called) CreateSnapshotTask task;
while (snapshots_queue.pop(task))
{ {
CreateSnapshotTask task;
if (!snapshots_queue.pop(task))
break;
try try
{ {
auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called);
if (shutdown_called)
break;
if (!snapshot_file_info) if (!snapshot_file_info)
continue; continue;

View File

@ -1,12 +1,14 @@
#include <atomic> #include <atomic>
#include <cerrno> #include <cerrno>
#include <chrono>
#include <Coordination/KeeperDispatcher.h>
#include <Coordination/KeeperReconfiguration.h>
#include <Coordination/KeeperSnapshotManager.h> #include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/KeeperStateMachine.h> #include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperDispatcher.h>
#include <Coordination/KeeperStorage.h> #include <Coordination/KeeperStorage.h>
#include <Coordination/KeeperReconfiguration.h>
#include <Coordination/ReadBufferFromNuraftBuffer.h> #include <Coordination/ReadBufferFromNuraftBuffer.h>
#include <Coordination/WriteBufferFromNuraftBuffer.h> #include <Coordination/WriteBufferFromNuraftBuffer.h>
#include <Disks/DiskLocal.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <base/defines.h> #include <base/defines.h>
#include <base/errnoToString.h> #include <base/errnoToString.h>
@ -17,7 +19,6 @@
#include <Common/ZooKeeper/ZooKeeperCommon.h> #include <Common/ZooKeeper/ZooKeeperCommon.h>
#include <Common/ZooKeeper/ZooKeeperIO.h> #include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Disks/DiskLocal.h>
namespace ProfileEvents namespace ProfileEvents
@ -31,6 +32,7 @@ namespace ProfileEvents
extern const Event KeeperSnapshotApplysFailed; extern const Event KeeperSnapshotApplysFailed;
extern const Event KeeperReadSnapshot; extern const Event KeeperReadSnapshot;
extern const Event KeeperSaveSnapshot; extern const Event KeeperSaveSnapshot;
extern const Event KeeperStorageLockWaitMicroseconds;
} }
namespace DB namespace DB
@ -151,6 +153,20 @@ void assertDigest(
} }
} }
struct TSA_SCOPED_LOCKABLE LockGuardWithStats final
{
std::unique_lock<std::mutex> lock;
explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex)
{
Stopwatch watch;
std::unique_lock l(mutex);
ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds());
lock = std::move(l);
}
~LockGuardWithStats() TSA_RELEASE() = default;
};
} }
nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data)
@ -272,7 +288,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig)
return true; return true;
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
if (storage->isFinalized()) if (storage->isFinalized())
return false; return false;
@ -302,7 +318,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session)
{ {
std::lock_guard _(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session);
if (!responses_queue.push(response)) if (!responses_queue.push(response))
{ {
@ -391,7 +407,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session))
return nullptr; return nullptr;
auto try_push = [this](const KeeperStorage::ResponseForSession& response) auto try_push = [&](const KeeperStorage::ResponseForSession& response)
{ {
if (!responses_queue.push(response)) if (!responses_queue.push(response))
{ {
@ -400,6 +416,17 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
"Failed to push response with session id {} to the queue, probably because of shutdown", "Failed to push response with session id {} to the queue, probably because of shutdown",
response.session_id); response.session_id);
} }
using namespace std::chrono;
uint64_t elapsed = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count() - request_for_session->time;
if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms)
{
LOG_INFO(
log,
"Total time to process a request took too long ({}ms).\nRequest info: {}",
elapsed,
request_for_session->request->toString(/*short_format=*/true));
}
}; };
try try
@ -417,7 +444,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
response_for_session.session_id = -1; response_for_session.session_id = -1;
response_for_session.response = response; response_for_session.response = response;
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
session_id = storage->getSessionID(session_id_request.session_timeout_ms); session_id = storage->getSessionID(session_id_request.session_timeout_ms);
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
response->session_id = session_id; response->session_id = session_id;
@ -426,12 +453,13 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
else else
{ {
if (op_num == Coordination::OpNum::Close) if (op_num == Coordination::OpNum::Close)
{ {
std::lock_guard lock(request_cache_mutex); std::lock_guard lock(request_cache_mutex);
parsed_request_cache.erase(request_for_session->session_id); parsed_request_cache.erase(request_for_session->session_id);
} }
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
KeeperStorage::ResponsesForSessions responses_for_sessions KeeperStorage::ResponsesForSessions responses_for_sessions
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
for (auto & response_for_session : responses_for_sessions) for (auto & response_for_session : responses_for_sessions)
@ -482,7 +510,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
} }
{ /// deserialize and apply snapshot to storage { /// deserialize and apply snapshot to storage
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
SnapshotDeserializationResult snapshot_deserialization_result; SnapshotDeserializationResult snapshot_deserialization_result;
if (latest_snapshot_ptr) if (latest_snapshot_ptr)
@ -534,7 +562,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return; return;
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
storage->rollbackRequest(request_for_session.zxid, allow_missing); storage->rollbackRequest(request_for_session.zxid, allow_missing);
} }
@ -561,7 +589,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf);
CreateSnapshotTask snapshot_task; CreateSnapshotTask snapshot_task;
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy, getClusterConfig()); snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy, getClusterConfig());
} }
@ -569,7 +597,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup)
{ {
nuraft::ptr<std::exception> exception(nullptr); nuraft::ptr<std::exception> exception(nullptr);
bool ret = true; bool ret = false;
if (!execute_only_cleanup) if (!execute_only_cleanup)
{ {
try try
@ -599,7 +627,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
else else
{ {
auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot);
auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(
*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx());
latest_snapshot_info = std::move(snapshot_info); latest_snapshot_info = std::move(snapshot_info);
latest_snapshot_buf = std::move(snapshot_buf); latest_snapshot_buf = std::move(snapshot_buf);
} }
@ -612,18 +641,19 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
latest_snapshot_info->path); latest_snapshot_info->path);
} }
} }
ret = true;
} }
catch (...) catch (...)
{ {
ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed);
LOG_TRACE(log, "Exception happened during snapshot"); LOG_TRACE(log, "Exception happened during snapshot");
tryLogCurrentException(log); tryLogCurrentException(log);
ret = false;
} }
} }
{ {
/// Destroy snapshot with lock /// Destroy snapshot with lock
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
LOG_TRACE(log, "Clearing garbage after snapshot"); LOG_TRACE(log, "Clearing garbage after snapshot");
/// Turn off "snapshot mode" and clear outdate part of storage state /// Turn off "snapshot mode" and clear outdate part of storage state
storage->clearGarbageAfterSnapshot(); storage->clearGarbageAfterSnapshot();
@ -764,7 +794,7 @@ int KeeperStateMachine::read_logical_snp_obj(
void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session)
{ {
/// Pure local request, just process it with storage /// Pure local request, just process it with storage
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
auto responses = storage->processRequest( auto responses = storage->processRequest(
request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/);
for (const auto & response : responses) for (const auto & response : responses)
@ -774,97 +804,97 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi
void KeeperStateMachine::shutdownStorage() void KeeperStateMachine::shutdownStorage()
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
storage->finalize(); storage->finalize();
} }
std::vector<int64_t> KeeperStateMachine::getDeadSessions() std::vector<int64_t> KeeperStateMachine::getDeadSessions()
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getDeadSessions(); return storage->getDeadSessions();
} }
int64_t KeeperStateMachine::getNextZxid() const int64_t KeeperStateMachine::getNextZxid() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNextZXID(); return storage->getNextZXID();
} }
KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNodesDigest(false); return storage->getNodesDigest(false);
} }
uint64_t KeeperStateMachine::getLastProcessedZxid() const uint64_t KeeperStateMachine::getLastProcessedZxid() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getZXID(); return storage->getZXID();
} }
uint64_t KeeperStateMachine::getNodesCount() const uint64_t KeeperStateMachine::getNodesCount() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNodesCount(); return storage->getNodesCount();
} }
uint64_t KeeperStateMachine::getTotalWatchesCount() const uint64_t KeeperStateMachine::getTotalWatchesCount() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getTotalWatchesCount(); return storage->getTotalWatchesCount();
} }
uint64_t KeeperStateMachine::getWatchedPathsCount() const uint64_t KeeperStateMachine::getWatchedPathsCount() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getWatchedPathsCount(); return storage->getWatchedPathsCount();
} }
uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getSessionsWithWatchesCount(); return storage->getSessionsWithWatchesCount();
} }
uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getTotalEphemeralNodesCount(); return storage->getTotalEphemeralNodesCount();
} }
uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getSessionWithEphemeralNodesCount(); return storage->getSessionWithEphemeralNodesCount();
} }
void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
storage->dumpWatches(buf); storage->dumpWatches(buf);
} }
void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
storage->dumpWatchesByPath(buf); storage->dumpWatchesByPath(buf);
} }
void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
storage->dumpSessionsAndEphemerals(buf); storage->dumpSessionsAndEphemerals(buf);
} }
uint64_t KeeperStateMachine::getApproximateDataSize() const uint64_t KeeperStateMachine::getApproximateDataSize() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getApproximateDataSize(); return storage->getApproximateDataSize();
} }
uint64_t KeeperStateMachine::getKeyArenaSize() const uint64_t KeeperStateMachine::getKeyArenaSize() const
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
return storage->getArenaDataSize(); return storage->getArenaDataSize();
} }
@ -905,7 +935,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
void KeeperStateMachine::recalculateStorageStats() void KeeperStateMachine::recalculateStorageStats()
{ {
std::lock_guard lock(storage_and_responses_lock); LockGuardWithStats lock(storage_and_responses_lock);
LOG_INFO(log, "Recalculating storage stats"); LOG_INFO(log, "Recalculating storage stats");
storage->recalculateStats(); storage->recalculateStats();
LOG_INFO(log, "Done recalculating storage stats"); LOG_INFO(log, "Done recalculating storage stats");

View File

@ -182,8 +182,7 @@ private:
KeeperSnapshotManagerS3 * snapshot_manager_s3; KeeperSnapshotManagerS3 * snapshot_manager_s3;
KeeperStorage::ResponseForSession processReconfiguration( KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session)
const KeeperStorage::RequestForSession& request_for_session)
TSA_REQUIRES(storage_and_responses_lock); TSA_REQUIRES(storage_and_responses_lock);
}; };
} }

View File

@ -40,6 +40,8 @@ namespace ProfileEvents
extern const Event KeeperGetRequest; extern const Event KeeperGetRequest;
extern const Event KeeperListRequest; extern const Event KeeperListRequest;
extern const Event KeeperExistsRequest; extern const Event KeeperExistsRequest;
extern const Event KeeperPreprocessElapsedMicroseconds;
extern const Event KeeperProcessElapsedMicroseconds;
} }
namespace DB namespace DB
@ -2309,6 +2311,20 @@ void KeeperStorage::preprocessRequest(
std::optional<Digest> digest, std::optional<Digest> digest,
int64_t log_idx) int64_t log_idx)
{ {
Stopwatch watch;
SCOPE_EXIT({
auto elapsed = watch.elapsedMicroseconds();
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms)
{
LOG_INFO(
getLogger("KeeperStorage"),
"Preprocessing a request took too long ({}ms).\nRequest info: {}",
elapsed_ms,
zk_request->toString(/*short_format=*/true));
}
ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, elapsed);
});
if (!initialized) if (!initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized");
@ -2409,6 +2425,20 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(
bool check_acl, bool check_acl,
bool is_local) bool is_local)
{ {
Stopwatch watch;
SCOPE_EXIT({
auto elapsed = watch.elapsedMicroseconds();
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms)
{
LOG_INFO(
getLogger("KeeperStorage"),
"Processing a request took too long ({}ms).\nRequest info: {}",
elapsed_ms,
zk_request->toString(/*short_format=*/true));
}
ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, elapsed);
});
if (!initialized) if (!initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized");

View File

@ -3,6 +3,7 @@
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/ArenaUtils.h> #include <Common/ArenaUtils.h>
#include <list>
namespace DB namespace DB
{ {

View File

@ -623,7 +623,7 @@ class IColumn;
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
@ -733,6 +733,7 @@ class IColumn;
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \
M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \
M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \
M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \
M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \

View File

@ -59,8 +59,10 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{ {
{"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
{"allow_materialized_view_with_bad_select", true, false, "Stricter validation in CREATE MATERIALIZED VIEW"}, {"allow_materialized_view_with_bad_select", true, false, "Stricter validation in CREATE MATERIALIZED VIEW"},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
}}, }},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},

View File

@ -32,7 +32,7 @@ namespace ErrorCodes
extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
extern const int ILLEGAL_INDEX; extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
@ -286,7 +286,7 @@ std::optional<size_t> DataTypeTuple::tryGetPositionByName(const String & name) c
String DataTypeTuple::getNameByPosition(size_t i) const String DataTypeTuple::getNameByPosition(size_t i) const
{ {
if (i == 0 || i > names.size()) if (i == 0 || i > names.size())
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size());
return names[i - 1]; return names[i - 1];
} }

View File

@ -186,7 +186,7 @@ void IDisk::checkAccess()
DB::UUID server_uuid = DB::ServerUUID::get(); DB::UUID server_uuid = DB::ServerUUID::get();
if (server_uuid == DB::UUIDHelpers::Nil) if (server_uuid == DB::UUIDHelpers::Nil)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized");
const String path = fmt::format("clickhouse_access_check_{}", DB::toString(server_uuid)); const String path = fmt::format("clickhouse_access_check_{}", toString(server_uuid));
checkAccessImpl(path); checkAccessImpl(path);
} }

View File

@ -427,7 +427,7 @@ public:
/// Device: 10301h/66305d Inode: 3109907 Links: 1 /// Device: 10301h/66305d Inode: 3109907 Links: 1
/// Why we have always zero by default? Because normal filesystem /// Why we have always zero by default? Because normal filesystem
/// manages hardlinks by itself. So you can always remove hardlink and all /// manages hardlinks by itself. So you can always remove hardlink and all
/// other alive harlinks will not be removed. /// other alive hardlinks will not be removed.
virtual UInt32 getRefCount(const String &) const { return 0; } virtual UInt32 getRefCount(const String &) const { return 0; }
/// Revision is an incremental counter of disk operation. /// Revision is an incremental counter of disk operation.

View File

@ -14,6 +14,15 @@ namespace ProfileEvents
{ {
extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerBytes;
extern const Event RemoteWriteThrottlerSleepMicroseconds; extern const Event RemoteWriteThrottlerSleepMicroseconds;
extern const Event AzureUpload;
extern const Event AzureStageBlock;
extern const Event AzureCommitBlockList;
extern const Event DiskAzureUpload;
extern const Event DiskAzureStageBlock;
extern const Event DiskAzureCommitBlockList;
} }
namespace DB namespace DB
@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize()
/// then we use single part upload instead of multi part upload /// then we use single part upload instead of multi part upload
if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size)
{ {
ProfileEvents::increment(ProfileEvents::AzureUpload);
if (blob_container_client->GetClickhouseOptions().IsClientForDisk)
ProfileEvents::increment(ProfileEvents::DiskAzureUpload);
auto part_data = std::move(detached_part_data.front()); auto part_data = std::move(detached_part_data.front());
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(part_data.memory.data()), part_data.data_size); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(part_data.memory.data()), part_data.data_size);
@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl()
if (!block_ids.empty()) if (!block_ids.empty())
{ {
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
ProfileEvents::increment(ProfileEvents::AzureCommitBlockList);
if (blob_container_client->GetClickhouseOptions().IsClientForDisk)
ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList);
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries);
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
} }
@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage:
auto & data_block_id = std::get<0>(*worker_data); auto & data_block_id = std::get<0>(*worker_data);
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
ProfileEvents::increment(ProfileEvents::AzureStageBlock);
if (blob_container_client->GetClickhouseOptions().IsClientForDisk)
ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(std::get<1>(*worker_data).memory.data()), data_size); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(std::get<1>(*worker_data).memory.data()), data_size);
execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size);
}; };

View File

@ -60,7 +60,6 @@ public:
"ListObjectAzure") "ListObjectAzure")
, client(client_) , client(client_)
{ {
options.Prefix = path_prefix; options.Prefix = path_prefix;
options.PageSizeHint = static_cast<int>(max_list_size); options.PageSizeHint = static_cast<int>(max_list_size);
} }
@ -150,7 +149,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr
auto settings_ptr = settings.get(); auto settings_ptr = settings.get();
auto client_ptr = client.get(); auto client_ptr = client.get();
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys); return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size);
} }
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const

View File

@ -75,6 +75,7 @@ struct RelativePathWithMetadata
virtual std::string getPath() const { return relative_path; } virtual std::string getPath() const { return relative_path; }
virtual bool isArchive() const { return false; } virtual bool isArchive() const { return false; }
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
}; };
struct ObjectKeyWithMetadata struct ObjectKeyWithMetadata

View File

@ -22,8 +22,7 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr<UnlinkMetadataFile
/// Also it has excessive API calls. /// Also it has excessive API calls.
/// ///
/// It is used to allow BACKUP/RESTORE to ObjectStorage (S3/...) with the same /// It is used to allow BACKUP/RESTORE to ObjectStorage (S3/...) with the same
/// structure as on disk MergeTree, and does not requires metadata from local /// structure as on disk MergeTree, and does not require metadata from a local disk to restore.
/// disk to restore.
class MetadataStorageFromPlainObjectStorage : public IMetadataStorage class MetadataStorageFromPlainObjectStorage : public IMetadataStorage
{ {
public: public:

View File

@ -1,10 +1,14 @@
#include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h> #include <Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h>
#include <Disks/ObjectStorages/ObjectStorageIterator.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/SharedThreadPools.h>
#include <IO/S3Common.h>
#include <Common/ErrorCodes.h> #include <Common/ErrorCodes.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include "CommonPathPrefixKeyGenerator.h" #include "CommonPathPrefixKeyGenerator.h"
namespace DB namespace DB
{ {
@ -22,34 +26,78 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
{ {
MetadataStorageFromPlainObjectStorage::PathMap result; MetadataStorageFromPlainObjectStorage::PathMap result;
RelativePathsWithMetadata files; ThreadPool & pool = getIOThreadPool().get();
object_storage->listObjects(root, files, 0); ThreadPoolCallbackRunnerLocal<void> runner(pool, "PlainRWMetaLoad");
for (const auto & file : files) std::mutex mutex;
LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage");
ReadSettings settings;
settings.enable_filesystem_cache = false;
settings.remote_fs_method = RemoteFSReadMethod::read;
settings.remote_fs_buffer_size = 1024; /// These files are small.
LOG_DEBUG(log, "Loading metadata");
size_t num_files = 0;
for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next())
{ {
auto remote_path = std::filesystem::path(file->relative_path); ++num_files;
auto file = iterator->current();
String path = file->getPath();
auto remote_path = std::filesystem::path(path);
if (remote_path.filename() != PREFIX_PATH_FILE_NAME) if (remote_path.filename() != PREFIX_PATH_FILE_NAME)
continue; continue;
StoredObject object{file->relative_path}; runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings]
{
setThreadName("PlainRWMetaLoad");
auto read_buf = object_storage->readObject(object); StoredObject object{path};
String local_path; String local_path;
readStringUntilEOF(local_path, *read_buf);
chassert(remote_path.has_parent_path()); try
auto res = result.emplace(local_path, remote_path.parent_path()); {
auto read_buf = object_storage->readObject(object, settings);
readStringUntilEOF(local_path, *read_buf);
}
#if USE_AWS_S3
catch (const S3Exception & e)
{
/// It is ok if a directory was removed just now.
/// We support attaching a filesystem that is concurrently modified by someone else.
if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY)
return;
throw;
}
#endif
catch (...)
{
throw;
}
/// This can happen if table replication is enabled, then the same local path is written chassert(remote_path.has_parent_path());
/// in `prefix.path` of each replica. std::pair<MetadataStorageFromPlainObjectStorage::PathMap::iterator, bool> res;
/// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? {
if (!res.second) std::lock_guard lock(mutex);
LOG_WARNING( res = result.emplace(local_path, remote_path.parent_path());
getLogger("MetadataStorageFromPlainObjectStorage"), }
"The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'",
local_path, /// This can happen if table replication is enabled, then the same local path is written
res.first->second, /// in `prefix.path` of each replica.
remote_path.parent_path().string()); /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed?
if (!res.second)
LOG_WARNING(
log,
"The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'",
local_path,
res.first->second,
remote_path.parent_path().string());
});
} }
runner.waitForAllToFinishAndRethrowFirstError();
LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result.size());
auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; auto metric = object_storage->getMetadataStorageMetrics().directory_map_size;
CurrentMetrics::add(metric, result.size()); CurrentMetrics::add(metric, result.size());
return result; return result;

View File

@ -4,6 +4,7 @@
#include <memory> #include <memory>
namespace DB namespace DB
{ {

View File

@ -9,15 +9,34 @@ namespace DB
class IObjectStorageIterator class IObjectStorageIterator
{ {
public: public:
/// Moves iterator to the next element. If the iterator not isValid, the behavior is undefined.
virtual void next() = 0; virtual void next() = 0;
virtual void nextBatch() = 0;
/// Check if the iterator is valid, which means the `current` method can be called.
virtual bool isValid() = 0; virtual bool isValid() = 0;
/// Return the current element.
virtual RelativePathWithMetadataPtr current() = 0; virtual RelativePathWithMetadataPtr current() = 0;
virtual RelativePathsWithMetadata currentBatch() = 0;
/// This will initiate prefetching the next batch in background, so it can be obtained faster when needed.
virtual std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() = 0; virtual std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() = 0;
/// Returns the number of elements in the batches that were fetched so far.
virtual size_t getAccumulatedSize() const = 0; virtual size_t getAccumulatedSize() const = 0;
virtual ~IObjectStorageIterator() = default; virtual ~IObjectStorageIterator() = default;
private:
/// Skips all the remaining elements in the current batch (if any),
/// and moves the iterator to the first element of the next batch,
/// or, if there is no more batches, the iterator becomes invalid.
/// If the iterator not isValid, the behavior is undefined.
virtual void nextBatch() = 0;
/// Return the current batch of elements.
/// It is unspecified how batches are formed.
/// But this method can be used for more efficient processing.
virtual RelativePathsWithMetadata currentBatch() = 0;
}; };
using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>; using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
@ -25,6 +44,7 @@ using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
class ObjectStorageIteratorFromList : public IObjectStorageIterator class ObjectStorageIteratorFromList : public IObjectStorageIterator
{ {
public: public:
/// Everything is represented by just a single batch.
explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_) explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_)
: batch(std::move(batch_)) : batch(std::move(batch_))
, batch_iterator(batch.begin()) {} , batch_iterator(batch.begin()) {}

View File

@ -37,10 +37,11 @@ void IObjectStorageIteratorAsync::nextBatch()
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
if (is_finished) if (!has_next_batch)
{ {
current_batch.clear(); current_batch.clear();
current_batch_iterator = current_batch.begin(); current_batch_iterator = current_batch.begin();
is_finished = true;
return; return;
} }
@ -58,16 +59,23 @@ void IObjectStorageIteratorAsync::nextBatch()
current_batch = std::move(result.batch); current_batch = std::move(result.batch);
current_batch_iterator = current_batch.begin(); current_batch_iterator = current_batch.begin();
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); if (current_batch.empty())
{
if (result.has_next)
outcome_future = scheduleBatch();
else
is_finished = true; is_finished = true;
has_next_batch = false;
}
else
{
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
has_next_batch = result.has_next;
if (has_next_batch)
outcome_future = scheduleBatch();
}
} }
catch (...) catch (...)
{ {
is_finished = true; has_next_batch = false;
throw; throw;
} }
} }
@ -76,10 +84,12 @@ void IObjectStorageIteratorAsync::next()
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
if (is_finished)
return;
++current_batch_iterator;
if (current_batch_iterator == current_batch.end()) if (current_batch_iterator == current_batch.end())
nextBatch(); nextBatch();
else
++current_batch_iterator;
} }
std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch() std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch()
@ -99,7 +109,7 @@ bool IObjectStorageIteratorAsync::isValid()
if (!is_initialized) if (!is_initialized)
nextBatch(); nextBatch();
return current_batch_iterator != current_batch.end(); return !is_finished;
} }
RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current() RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()

View File

@ -35,7 +35,7 @@ public:
void deactivate(); void deactivate();
protected: protected:
/// This method fetches the next batch, and returns true if there are more batches after it.
virtual bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) = 0; virtual bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) = 0;
struct BatchAndHasNext struct BatchAndHasNext
@ -48,6 +48,7 @@ protected:
bool is_initialized{false}; bool is_initialized{false};
bool is_finished{false}; bool is_finished{false};
bool has_next_batch{true};
bool deactivated{false}; bool deactivated{false};
mutable std::recursive_mutex mutex; mutable std::recursive_mutex mutex;

View File

@ -293,6 +293,8 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
{ {
auto settings_ptr = s3_settings.get(); auto settings_ptr = s3_settings.get();
if (!max_keys)
max_keys = settings_ptr->list_object_keys_size;
return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), max_keys); return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), max_keys);
} }

View File

@ -67,7 +67,7 @@ private:
} }
public: public:
template <class ...Args> template <typename... Args>
explicit S3ObjectStorage(std::unique_ptr<S3::Client> && client_, Args && ...args) explicit S3ObjectStorage(std::unique_ptr<S3::Client> && client_, Args && ...args)
: S3ObjectStorage("S3ObjectStorage", std::move(client_), std::forward<Args>(args)...) : S3ObjectStorage("S3ObjectStorage", std::move(client_), std::forward<Args>(args)...)
{ {

View File

@ -202,7 +202,7 @@ public:
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"} {"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
}; };
validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); validateFunctionArguments(*this, arguments, mandatory_arguments);
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }

View File

@ -16,6 +16,7 @@ namespace ErrorCodes
{ {
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int PARAMETER_OUT_OF_BOUND;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
} }
@ -146,6 +147,9 @@ private:
const auto pos = pos_col_const->getUInt(0); const auto pos = pos_col_const->getUInt(0);
if (pos < 8 * sizeof(ValueType)) if (pos < 8 * sizeof(ValueType))
mask = mask | (ValueType(1) << pos); mask = mask | (ValueType(1) << pos);
else
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument {} is out of bounds for number", static_cast<UInt64>(pos));
} }
else else
{ {
@ -186,13 +190,20 @@ private:
for (const auto i : collections::range(0, mask.size())) for (const auto i : collections::range(0, mask.size()))
if (pos[i] < 8 * sizeof(ValueType)) if (pos[i] < 8 * sizeof(ValueType))
mask[i] = mask[i] | (ValueType(1) << pos[i]); mask[i] = mask[i] | (ValueType(1) << pos[i]);
else
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument {} is out of bounds for number", static_cast<UInt64>(pos[i]));
return true; return true;
} }
else if (const auto pos_col_const = checkAndGetColumnConst<ColumnVector<PosType>>(pos_col_untyped)) else if (const auto pos_col_const = checkAndGetColumnConst<ColumnVector<PosType>>(pos_col_untyped))
{ {
const auto & pos = pos_col_const->template getValue<PosType>(); const auto & pos = pos_col_const->template getValue<PosType>();
const auto new_mask = pos < 8 * sizeof(ValueType) ? ValueType(1) << pos : 0; if (pos >= 8 * sizeof(ValueType))
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument {} is out of bounds for number", static_cast<UInt64>(pos));
const auto new_mask = ValueType(1) << pos;
for (const auto i : collections::range(0, mask.size())) for (const auto i : collections::range(0, mask.size()))
mask[i] = mask[i] | new_mask; mask[i] = mask[i] | new_mask;

View File

@ -95,22 +95,21 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName
return res; return res;
} }
void validateArgumentType(const IFunction & func, const DataTypes & arguments,
size_t argument_index, bool (* validator_func)(const IDataType &),
const char * expected_type_description)
{
if (arguments.size() <= argument_index)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}",
func.getName());
const auto & argument = arguments[argument_index];
if (!validator_func(*argument))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}",
argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description);
}
namespace namespace
{ {
String withOrdinalEnding(size_t i)
{
switch (i)
{
case 0: return "1st";
case 1: return "2nd";
case 2: return "3rd";
default: return std::to_string(i) + "th";
}
}
void validateArgumentsImpl(const IFunction & func, void validateArgumentsImpl(const IFunction & func,
const ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
size_t argument_offset, size_t argument_offset,
@ -120,20 +119,18 @@ void validateArgumentsImpl(const IFunction & func,
{ {
const auto argument_index = i + argument_offset; const auto argument_index = i + argument_offset;
if (argument_index >= arguments.size()) if (argument_index >= arguments.size())
{
break; break;
}
const auto & arg = arguments[i + argument_offset]; const auto & arg = arguments[i + argument_offset];
const auto & descriptor = descriptors[i]; const auto & descriptor = descriptors[i];
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0) if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
throw Exception(error_code, throw Exception(error_code,
"Illegal type of argument #{}{} of function {}{}{}", "A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
argument_offset + i + 1, // +1 is for human-friendly 1-based indexing withOrdinalEnding(argument_offset + i),
(descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}), descriptor.name,
func.getName(), func.getName(),
(descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}), descriptor.type_name,
(arg.type ? ", got " + arg.type->getName() : String{})); arg.type ? arg.type->getName() : "<?>");
} }
} }
@ -141,52 +138,42 @@ void validateArgumentsImpl(const IFunction & func,
int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const
{ {
if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type))) if (name.empty() || type_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set");
if (type_validator && (data_type == nullptr || !type_validator(*data_type)))
return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT; return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT;
if (column_validator_func && (column == nullptr || !column_validator_func(*column))) if (column_validator && (column == nullptr || !column_validator(*column)))
return ErrorCodes::ILLEGAL_COLUMN; return ErrorCodes::ILLEGAL_COLUMN;
return 0; return 0;
} }
void validateFunctionArgumentTypes(const IFunction & func, void validateFunctionArguments(const IFunction & func,
const ColumnsWithTypeAndName & arguments, const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args, const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args) const FunctionArgumentDescriptors & optional_args)
{ {
if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size())
{ {
auto join_argument_types = [](const auto & args, const String sep = ", ") auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
{
String result;
for (const auto & a : args)
{
using A = std::decay_t<decltype(a)>;
if constexpr (std::is_same_v<A, FunctionArgumentDescriptor>)
{
if (a.argument_name)
result += "'" + std::string(a.argument_name) + "' : ";
if (a.expected_type_description)
result += a.expected_type_description;
}
else if constexpr (std::is_same_v<A, ColumnWithTypeAndName>)
result += a.type->getName();
result += sep; String expected_args_string;
} if (!mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args));
if (!args.empty()) else if (!mandatory_args.empty() && optional_args.empty())
result.erase(result.end() - sep.length(), result.end()); expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
else if (mandatory_args.empty() && !optional_args.empty())
return result; expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args));
}; else
expected_args_string = "0 arguments";
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})", "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}",
func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}), func.getName(),
mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""), expected_args_string,
join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : "")); fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments)));
} }
validateArgumentsImpl(func, arguments, 0, mandatory_args); validateArgumentsImpl(func, arguments, 0, mandatory_args);

View File

@ -115,77 +115,58 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col);
/// column if it is nullable. /// column if it is nullable.
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns); ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns);
/// Checks argument type at specified index with predicate. /// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided
/// throws if there is no argument at specified index or if predicate returns false. /// arguments match the expected arguments.
void validateArgumentType(const IFunction & func, const DataTypes & arguments,
size_t argument_index, bool (* validator_func)(const IDataType &),
const char * expected_type_description);
/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected
*
* Also it is used to generate function description when arguments do not match expected ones.
* Any field can be null:
* `argument_name` - if not null, reported via type check errors.
* `expected_type_description` - if not null, reported via type check errors.
* `type_validator_func` - if not null, used to validate data type of function argument.
* `column_validator_func` - if not null, used to validate column of function argument.
*/
struct FunctionArgumentDescriptor struct FunctionArgumentDescriptor
{ {
const char * argument_name; /// The argument name, e.g. "longitude".
/// Should not be empty.
std::string_view name;
/// A function which validates the argument data type.
/// May be nullptr.
using TypeValidator = bool (*)(const IDataType &); using TypeValidator = bool (*)(const IDataType &);
TypeValidator type_validator_func; TypeValidator type_validator;
/// A function which validates the argument column.
/// May be nullptr.
using ColumnValidator = bool (*)(const IColumn &); using ColumnValidator = bool (*)(const IColumn &);
ColumnValidator column_validator_func; ColumnValidator column_validator;
const char * expected_type_description; /// The expected argument type, e.g. "const String" or "UInt64".
/// Should not be empty.
std::string_view type_name;
/** Validate argument type and column. /// Validate argument type and column.
*
* Returns non-zero error code if:
* Validator != nullptr && (Value == nullptr || Validator(*Value) == false)
* For:
* Validator is either `type_validator_func` or `column_validator_func`
* Value is either `data_type` or `column` respectively.
* ILLEGAL_TYPE_OF_ARGUMENT if type validation fails
*
*/
int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const; int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const;
}; };
using FunctionArgumentDescriptors = std::vector<FunctionArgumentDescriptor>; using FunctionArgumentDescriptors = std::vector<FunctionArgumentDescriptor>;
/** Validate that function arguments match specification. /// Validates that the user-provided arguments match the expected arguments.
* ///
* Designed to simplify argument validation for functions with variable arguments /// Checks that
* (e.g. depending on result type or other trait). /// - the number of provided arguments matches the number of mandatory/optional arguments,
* First, checks that number of arguments is as expected (including optional arguments). /// - all mandatory arguments are present and have the right type,
* Second, checks that mandatory args present and have valid type. /// - optional arguments - if present - have the right type.
* Third, checks optional arguments types, skipping ones that are missing. ///
* /// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid:
* Please note that if you have several optional arguments, like f([a, b, c]), /// f(a)
* only these calls are considered valid: /// f(a, b)
* f(a) /// f(a, b, c)
* f(a, b) /// but these are NOT:
* f(a, b, c) /// f(a, c)
* /// f(b, c)
* But NOT these: f(a, c), f(b, c) void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments,
* In other words you can't omit middle optional arguments (just like in regular C++). const FunctionArgumentDescriptors & mandatory_args,
* const FunctionArgumentDescriptors & optional_args = {});
* If any mandatory arg is missing, throw an exception, with explicit description of expected arguments.
*/
void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args = {});
/// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw. /// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw.
std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *> std::pair<std::vector<const IColumn *>, const ColumnArray::Offset *>
checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments); checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns. /// Return ColumnNullable of src, with null map as OR-ed null maps of args columns.
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. /// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
*/
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count); ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count);
struct NullPresence struct NullPresence

View File

@ -5,7 +5,7 @@
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h> #include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/HashJoin.h> #include <Interpreters/HashJoin/HashJoin.h>
#include <Storages/StorageJoin.h> #include <Storages/StorageJoin.h>
#include <Storages/TableLockHolder.h> #include <Storages/TableLockHolder.h>

View File

@ -40,7 +40,7 @@ public:
{"replacement", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"} {"replacement", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
}; };
validateFunctionArgumentTypes(*this, arguments, args); validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }

View File

@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings(
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"}, {"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
}; };
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); validateFunctionArguments(func, arguments, mandatory_args, optional_args);
} }
static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments)
@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun
{"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"}, {"max_substrings", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), isColumnConst, "const Number"},
}; };
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); validateFunctionArguments(func, arguments, mandatory_args, optional_args);
} }
} }

View File

@ -47,7 +47,7 @@ public:
FunctionArgumentDescriptors args{ FunctionArgumentDescriptors args{
{"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime64), nullptr, "DateTime64"} {"value", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isDateTime64), nullptr, "DateTime64"}
}; };
validateFunctionArgumentTypes(*this, arguments, args); validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeInt64>(); return std::make_shared<DataTypeInt64>();
} }

View File

@ -165,7 +165,7 @@ private:
}); });
} }
validateFunctionArgumentTypes(*this, arguments, validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{ FunctionArgumentDescriptors{
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "encryption mode string"}, {"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "encryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "plaintext"}, {"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "plaintext"},
@ -438,7 +438,7 @@ private:
}); });
} }
validateFunctionArgumentTypes(*this, arguments, validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{ FunctionArgumentDescriptors{
{"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "decryption mode string"}, {"mode", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), isColumnConst, "decryption mode string"},
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "ciphertext"}, {"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), {}, "ciphertext"},

View File

@ -2020,7 +2020,7 @@ public:
DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const
{ {
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}};
FunctionArgumentDescriptors optional_args; FunctionArgumentDescriptors optional_args;
if constexpr (to_decimal) if constexpr (to_decimal)
@ -2049,7 +2049,7 @@ public:
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}); optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"});
} }
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
if constexpr (std::is_same_v<ToDataType, DataTypeInterval>) if constexpr (std::is_same_v<ToDataType, DataTypeInterval>)
{ {
@ -2390,7 +2390,7 @@ public:
if (isDateTime64<Name, ToDataType>(arguments)) if (isDateTime64<Name, ToDataType>(arguments))
{ {
validateFunctionArgumentTypes(*this, arguments, validateFunctionArguments(*this, arguments,
FunctionArgumentDescriptors{{"string", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}}, FunctionArgumentDescriptors{{"string", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}},
// optional // optional
FunctionArgumentDescriptors{ FunctionArgumentDescriptors{

View File

@ -518,66 +518,78 @@ struct Dispatcher
template <typename ScaleType> template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{ {
const auto & value_col_typed = checkAndGetColumn<ColumnVector<T>>(*value_col); // Non-const value argument:
auto col_res = ColumnVector<T>::create(); const auto * value_col_typed = checkAndGetColumn<ColumnVector<T>>(value_col);
if (value_col_typed)
typename ColumnVector<T>::Container & vec_res = col_res->getData();
vec_res.resize(value_col_typed.getData().size());
if (!vec_res.empty())
{ {
if (scale_col == nullptr || isColumnConst(*scale_col)) auto col_res = ColumnVector<T>::create();
{
auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
if (scale_arg == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed.getData(), scale, vec_res);
}
else if (scale_arg > 0)
{
size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed.getData(), scale, vec_res);
}
else
{
size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed.getData(), scale, vec_res);
}
}
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & value_data = value_col_typed.getData();
const auto & scale_data = scale_col_typed->getData();
const size_t rows = value_data.size();
for (size_t i = 0; i < rows; ++i) typename ColumnVector<T>::Container & vec_res = col_res->getData();
{ vec_res.resize(value_col_typed->getData().size());
Int64 scale64 = scale_data[i];
validateScale(scale64);
Scale raw_scale = scale64;
if (raw_scale == 0) if (!vec_res.empty())
{
// Const scale argument:
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
if (scale_arg == 0)
{ {
size_t scale = 1; size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::applyOne(value_data[i], scale, vec_res[i]); FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed->getData(), scale, vec_res);
} }
else if (raw_scale > 0) else if (scale_arg > 0)
{ {
size_t scale = intExp10(raw_scale); size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::applyOne(value_data[i], scale, vec_res[i]); FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed->getData(), scale, vec_res);
} }
else else
{ {
size_t scale = intExp10(-raw_scale); size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::applyOne(value_data[i], scale, vec_res[i]); FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed->getData(), scale, vec_res);
}
}
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & value_data = value_col_typed->getData();
const auto & scale_data = scale_col_typed->getData();
const size_t rows = value_data.size();
for (size_t i = 0; i < rows; ++i)
{
Int64 scale64 = scale_data[i];
validateScale(scale64);
Scale raw_scale = scale64;
if (raw_scale == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::applyOne(value_data[i], scale, vec_res[i]);
}
else if (raw_scale > 0)
{
size_t scale = intExp10(raw_scale);
FunctionRoundingImpl<ScaleMode::Positive>::applyOne(value_data[i], scale, vec_res[i]);
}
else
{
size_t scale = intExp10(-raw_scale);
FunctionRoundingImpl<ScaleMode::Negative>::applyOne(value_data[i], scale, vec_res[i]);
}
} }
} }
} }
return col_res;
} }
// Const value argument:
return col_res; const auto * value_col_typed_const = checkAndGetColumnConst<ColumnVector<T>>(value_col);
if (value_col_typed_const)
{
auto value_col_full = value_col_typed_const->convertToFullColumn();
return apply<ScaleType>(value_col_full.get(), scale_col);
}
return nullptr;
} }
}; };
@ -589,38 +601,52 @@ public:
template <typename ScaleType> template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{ {
const auto & value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(*value_col); // Non-const value argument:
const typename ColumnDecimal<T>::Container & vec_src = value_col_typed.getData(); const auto * value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(value_col);
if (value_col_typed)
auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed.getScale());
auto & vec_res = col_res->getData();
if (!vec_res.empty())
{ {
if (scale_col == nullptr || isColumnConst(*scale_col)) const typename ColumnDecimal<T>::Container & vec_src = value_col_typed->getData();
{
auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg);
}
/// Non-const scale argument
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & scale = scale_col_typed->getData();
const size_t rows = vec_src.size();
for (size_t i = 0; i < rows; ++i) auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed->getScale());
auto & vec_res = col_res->getData();
vec_res.resize(vec_src.size());
if (!vec_res.empty())
{
/// Const scale argument:
if (scale_col == nullptr || isColumnConst(*scale_col))
{ {
Int64 scale64 = scale[i]; auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
validateScale(scale64); DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg);
Scale raw_scale = scale64; }
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & scale = scale_col_typed->getData();
const size_t rows = vec_src.size();
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), for (size_t i = 0; i < rows; ++i)
reinterpret_cast<ColumnDecimal<T>::NativeT&>(col_res->getElement(i)), raw_scale); {
Int64 scale64 = scale[i];
validateScale(scale64);
Scale raw_scale = scale64;
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(),
reinterpret_cast<ColumnDecimal<T>::NativeT&>(col_res->getElement(i)), raw_scale);
}
} }
} }
}
return col_res; return col_res;
}
// Const value argument:
const auto * value_col_typed_const = checkAndGetColumnConst<ColumnDecimal<T>>(value_col);
if (value_col_typed_const)
{
auto value_col_full = value_col_typed_const->convertToFullColumn();
return apply<ScaleType>(value_col_full.get(), scale_col);
}
return nullptr;
} }
}; };
@ -647,7 +673,7 @@ public:
FunctionArgumentDescriptors optional_args{ FunctionArgumentDescriptors optional_args{
{"N", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "The number of decimal places to round to"}, {"N", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "The number of decimal places to round to"},
}; };
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return arguments[0].type; return arguments[0].type;
} }
@ -671,9 +697,6 @@ public:
using ScaleTypes = std::decay_t<decltype(scaleTypes)>; using ScaleTypes = std::decay_t<decltype(scaleTypes)>;
using ScaleType = typename ScaleTypes::RightType; using ScaleType = typename ScaleTypes::RightType;
if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column");
res = Dispatcher<DataType, rounding_mode, tie_breaking_mode>::template apply<ScaleType>(value_arg.column.get(), scale_column.column.get()); res = Dispatcher<DataType, rounding_mode, tie_breaking_mode>::template apply<ScaleType>(value_arg.column.get(), scale_column.column.get());
return true; return true;
}; };

View File

@ -48,7 +48,7 @@ namespace
{"json", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}, {"json", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
}; };
validateFunctionArgumentTypes(*this, arguments, args); validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>()); return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>());
} }

View File

@ -32,7 +32,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}, {"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
}; };
validateFunctionArgumentTypes(func, arguments, mandatory_args); validateFunctionArguments(func, arguments, mandatory_args);
} }
static constexpr auto strings_argument_position = 0uz; static constexpr auto strings_argument_position = 0uz;

View File

@ -30,7 +30,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}, {"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
}; };
validateFunctionArgumentTypes(func, arguments, mandatory_args); validateFunctionArguments(func, arguments, mandatory_args);
} }
static constexpr auto strings_argument_position = 0uz; static constexpr auto strings_argument_position = 0uz;

View File

@ -30,7 +30,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}, {"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
}; };
validateFunctionArgumentTypes(func, arguments, mandatory_args); validateFunctionArguments(func, arguments, mandatory_args);
} }
static constexpr auto strings_argument_position = 0uz; static constexpr auto strings_argument_position = 0uz;

View File

@ -31,7 +31,7 @@ public:
{"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}, {"URL", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
}; };
validateFunctionArgumentTypes(func, arguments, mandatory_args); validateFunctionArguments(func, arguments, mandatory_args);
} }
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}

View File

@ -51,6 +51,8 @@ public:
bool isVariadic() const override { return impl.isVariadic(); } bool isVariadic() const override { return impl.isVariadic(); }
size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); } size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); }
bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); }
bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); }
bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); } bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; }
@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase<MapToNestedAdapter<Name, retur
template <typename Name, size_t position> template <typename Name, size_t position>
struct MapToSubcolumnAdapter struct MapToSubcolumnAdapter
{ {
static_assert(position <= 1); static_assert(position <= 1, "position of Map subcolumn must be 0 or 1");
static void extractNestedTypes(DataTypes & types) static void extractNestedTypes(DataTypes & types)
{ {
@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; };
using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>; using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>;
struct NameMapContains { static constexpr auto name = "mapContains"; }; struct NameMapContains { static constexpr auto name = "mapContains"; };
using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapContains>; using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapContains, 0>, NameMapContains>;
struct NameMapFilter { static constexpr auto name = "mapFilter"; }; struct NameMapFilter { static constexpr auto name = "mapFilter"; };
using FunctionMapFilter = FunctionMapToArrayAdapter<FunctionArrayFilter, MapToNestedAdapter<NameMapFilter>, NameMapFilter>; using FunctionMapFilter = FunctionMapToArrayAdapter<FunctionArrayFilter, MapToNestedAdapter<NameMapFilter>, NameMapFilter>;

View File

@ -87,7 +87,7 @@ public:
{"array_1", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"}, {"array_1", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"array_2", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"}, {"array_2", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
}; };
validateFunctionArgumentTypes(*this, arguments, args); validateFunctionArguments(*this, arguments, args);
return std::make_shared<DataTypeNumber<ResultType>>(); return std::make_shared<DataTypeNumber<ResultType>>();
} }

View File

@ -39,7 +39,7 @@ public:
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"}, {"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"samples", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt), isColumnConst, "const UInt*"}, {"samples", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt), isColumnConst, "const UInt*"},
}; };
validateFunctionArgumentTypes(*this, arguments, args); validateFunctionArguments(*this, arguments, args);
// Return an array with the same nested type as the input array // Return an array with the same nested type as the input array
const DataTypePtr & array_type = arguments[0].type; const DataTypePtr & array_type = arguments[0].type;

View File

@ -31,7 +31,7 @@ public:
{"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"}, {"array", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isArray), nullptr, "Array"},
{"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"} {"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"}
}; };
validateFunctionArgumentTypes(*this, arguments, args); validateFunctionArguments(*this, arguments, args);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get()); const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(array_type->getNestedType())); return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(array_type->getNestedType()));

View File

@ -159,7 +159,7 @@ public:
{"separator", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}, {"separator", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"},
}; };
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }

View File

@ -8,6 +8,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int PARAMETER_OUT_OF_BOUND;
} }
namespace namespace
@ -21,12 +22,21 @@ struct BitTestImpl
static const constexpr bool allow_string_integer = false; static const constexpr bool allow_string_integer = false;
template <typename Result = ResultType> template <typename Result = ResultType>
NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) static Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{ {
if constexpr (is_big_int_v<A> || is_big_int_v<B>) if constexpr (is_big_int_v<A> || is_big_int_v<B>)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument");
else else
return (typename NumberTraits::ToInteger<A>::Type(a) >> typename NumberTraits::ToInteger<B>::Type(b)) & 1; {
typename NumberTraits::ToInteger<A>::Type a_int = a;
typename NumberTraits::ToInteger<B>::Type b_int = b;
const auto max_position = static_cast<decltype(b)>((8 * sizeof(a)) - 1);
if (b_int > max_position || b_int < 0)
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
"The bit position argument needs to a positive value and less or equal to {} for integer {}",
std::to_string(max_position), std::to_string(a_int));
return (a_int >> b_int) & 1;
}
} }
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER

View File

@ -203,7 +203,7 @@ private:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}};
FunctionArgumentDescriptors optional_args; FunctionArgumentDescriptors optional_args;
if (isDecimal(type) || isDateTime64(type)) if (isDecimal(type) || isDateTime64(type))
@ -212,9 +212,9 @@ private:
if (isDateTimeOrDateTime64(type)) if (isDateTimeOrDateTime64(type))
optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"}); optional_args.push_back({"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), isColumnConst, "const String"});
optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); optional_args.push_back({"default_value", nullptr, nullptr, "any type"});
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
size_t additional_argument_index = 1; size_t additional_argument_index = 1;

Some files were not shown because too many files have changed in this diff Show More