diff --git a/.gitattributes b/.gitattributes index 56d6fecf4b8..dd94a48f8e7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ contrib/* linguist-vendored *.h linguist-language=C++ tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/*.reference -crlf +src/Core/SettingsChangesHistory.cpp merge=union diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 9a2d02e3388..60231507c96 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -1,32 +1,3 @@ -// Based on https://github.com/amdn/itoa and combined with our optimizations -// -//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-// -// -// The MIT License (MIT) -// Copyright (c) 2016 Arturo Martin-de-Nicolas -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -//===----------------------------------------------------------------------===// - -#include -#include -#include #include #include #include @@ -34,99 +5,15 @@ namespace { -template -ALWAYS_INLINE inline constexpr T pow10(size_t x) -{ - return x ? 10 * pow10(x - 1) : 1; -} - -// Division by a power of 10 is implemented using a multiplicative inverse. -// This strength reduction is also done by optimizing compilers, but -// presently the fastest results are produced by using the values -// for the multiplication and the shift as given by the algorithm -// described by Agner Fog in "Optimizing Subroutines in Assembly Language" -// -// http://www.agner.org/optimize/optimizing_assembly.pdf -// -// "Integer division by a constant (all processors) -// A floating point number can be divided by a constant by multiplying -// with the reciprocal. If we want to do the same with integers, we have -// to scale the reciprocal by 2n and then shift the product to the right -// by n. There are various algorithms for finding a suitable value of n -// and compensating for rounding errors. The algorithm described below -// was invented by Terje Mathisen, Norway, and not published elsewhere." - -/// Division by constant is performed by: -/// 1. Adding 1 if needed; -/// 2. Multiplying by another constant; -/// 3. Shifting right by another constant. -template -struct Division -{ - static constexpr bool add{add_}; - static constexpr UInt multiplier{multiplier_}; - static constexpr unsigned shift{shift_}; -}; - -/// Select a type with appropriate number of bytes from the list of types. -/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. -/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. -template -struct SelectType -{ - using Result = typename SelectType::Result; -}; - -template -struct SelectType<1, T, Ts...> -{ - using Result = T; -}; - - -/// Division by 10^N where N is the size of the type. -template -using DivisionBy10PowN = typename SelectType< - N, - Division, /// divide by 10 - Division, /// divide by 100 - Division, /// divide by 10000 - Division /// divide by 100000000 - >::Result; - -template -using UnsignedOfSize = typename SelectType::Result; - -/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in -template -struct QuotientAndRemainder -{ - UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits - UnsignedOfSize remainder; // remainder with at most N decimal digits -}; - -template -QuotientAndRemainder inline split(UnsignedOfSize value) -{ - constexpr DivisionBy10PowN division; - - UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); - - return {quotient, remainder}; -} - -ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value) +ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value) { *p = '0' + value; - ++p; - return p; + return p + 1; } // Using a lookup table to convert binary numbers from 0 to 99 // into ascii characters as described by Andrei Alexandrescu in // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ - const char digits[201] = "00010203040506070809" "10111213141516171819" "20212223242526272829" @@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809" "70717273747576777879" "80818283848586878889" "90919293949596979899"; - ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) { memcpy(p, &digits[value * 2], 2); @@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) return p; } -namespace convert +namespace jeaiii { -template -char * head(char * p, UInt u); -template -char * tail(char * p, UInt u); +/* + MIT License -//===----------------------------------------------------------===// -// head: find most significant digit, skip leading zeros -//===----------------------------------------------------------===// + Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa -// "x" contains quotient and remainder after division by 10^N -// quotient is less than 10^N -template -ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder x) + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ +struct pair { - p = head(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} + char dd[2]; + constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor) + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor) +}; -// "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * head(char * p, UInt u) +constexpr struct { - return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); -} + pair dd[100]{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; + pair fd[100]{ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; +} digits; -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * head, 1>(char * p, UnsignedOfSize<1> u) +constexpr UInt64 mask24 = (UInt64(1) << 24) - 1; +constexpr UInt64 mask32 = (UInt64(1) << 32) - 1; +constexpr UInt64 mask57 = (UInt64(1) << 57) - 1; + +template +struct _cond { - return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// tail: produce all digits including leading zeros -//===----------------------------------------------------------===// - -// recursive step, "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * tail(char * p, UInt u) + using type = F; +}; +template +struct _cond { - QuotientAndRemainder x = split(u); - p = tail(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} + using type = T; +}; +template +using cond = typename _cond::type; -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * tail, 1>(char * p, UnsignedOfSize<1> u) +template +inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) { - return outTwoDigits(p, u); -} + constexpr auto q = sizeof(T); + using U = cond>>; -//===----------------------------------------------------------===// -// large values are >= 10^2*N -// where x contains quotient and remainder after division by 10^N -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder x) -{ - QuotientAndRemainder y = split(x.quotient); - p = head(p, UnsignedOfSize(y.quotient)); - p = tail(p, y.remainder); - p = tail(p, x.remainder); - return p; -} + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); -//===----------------------------------------------------------===// -// handle values of "u" that might be >= 10^2*N -// where N is the size of "u" in bytes -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * uitoa(char * p, UInt u) -{ - if (u < pow10>(N)) - return head(p, UnsignedOfSize(u)); - QuotientAndRemainder x = split(u); + if (n < U(1e2)) + { + /// This is changed from the original jeaiii implementation + /// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions + /// and a memory access (no need to read digits.fd[n]) + /// This is not true for pure random numbers, but that's not the common use case of a database + /// Original jeaii code + // *reinterpret_cast(b) = digits.fd[n]; + // return n < 10 ? b + 1 : b + 2; + return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); + } + if (n < UInt32(1e6)) + { + if (sizeof(U) == 1 || n < U(1e4)) + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + if constexpr (sizeof(U) == 1) + b -= 1; + else + b -= n < U(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; + } + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + if constexpr (sizeof(U) == 2) + b -= 1; + else + b -= n < U(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (sizeof(U) == 4 || n < UInt64(1ull << 32ull)) + { + if (n < U(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < U(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; + } + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } - return u < pow10>(2 * N) ? head(p, x) : large(p, x); -} + // if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings + UInt32 z = n % UInt32(1e8); + UInt64 u = n / UInt32(1e8); -// selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) -{ - if (u < 10) - return outDigit(p, u); - else if (u < 100) - return outTwoDigits(p, u); + if (u < UInt32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < UInt32(1e6)) + { + if (u < UInt32(1e4)) + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + else + { + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < UInt32(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < UInt64(1ull << 32ull)) + { + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } else { - p = outDigit(p, u / 100); - p = outTwoDigits(p, u % 100); - return p; + UInt32 y = u % UInt32(1e8); + u /= UInt32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < UInt32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; } -} - -//===----------------------------------------------------------===// -// handle unsigned and signed integral operands -//===----------------------------------------------------------===// - -// itoa: handle unsigned integral operands (selected by SFINAE) -template -requires(!std::is_signed_v && std::is_integral_v) -ALWAYS_INLINE inline char * itoa(U u, char * p) -{ - return convert::uitoa(p, u); -} - -// itoa: handle signed integral operands (selected by SFINAE) -template -requires(std::is_signed_v && std::is_integral_v) -ALWAYS_INLINE inline char * itoa(I i, char * p) -{ - // Need "mask" to be filled with a copy of the sign bit. - // If "i" is a negative value, then the result of "operator >>" - // is implementation-defined, though usually it is an arithmetic - // right shift that replicates the sign bit. - // Use a conditional expression to be portable, - // a good optimizing compiler generates an arithmetic right shift - // and avoids the conditional branch. - UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; - // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. - // Cannot use std::abs() because the result is undefined - // in 2's complement systems for the most-negative value. - // Want to avoid conditional branch for performance reasons since - // CPU branch prediction will be ineffective when negative values - // occur randomly. - // Let "u" be "i" cast to unsigned type UnsignedOfSize. - // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. - // This yields the absolute value with the desired type without - // using a conditional branch and without invoking undefined or - // implementation defined behavior: - UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); - // Unconditionally store a minus sign when producing digits - // in a forward direction and increment the pointer only if - // the value is in fact negative. - // This avoids a conditional branch and is safe because we will - // always produce at least one digit and it will overwrite the - // minus sign when the value is not negative. - *p = '-'; - p += (mask & 1); - p = convert::uitoa(p, u); - return p; + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } } @@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) { /// If we the highest 64bit item is empty, we can print just the lowest item as u64 if (_x.items[UInt128::_impl::little(1)] == 0) - return convert::itoa(_x.items[UInt128::_impl::little(0)], p); + return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]); /// Doing operations using __int128 is faster and we already rely on this feature using T = unsigned __int128; @@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) current_block += max_multiple_of_hundred_blocks; } - char * highest_part_print = convert::itoa(uint64_t(x), p); + char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x)); for (int i = 0; i < current_block; i++) { outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); @@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) char * itoa(UInt8 i, char * p) { - return convert::itoa(uint8_t(i), p); + return jeaiii::to_text_from_integer(p, uint8_t(i)); } char * itoa(Int8 i, char * p) { - return convert::itoa(int8_t(i), p); + return jeaiii::to_text_from_integer(p, int8_t(i)); } char * itoa(UInt128 i, char * p) @@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p) #define DEFAULT_ITOA(T) \ char * itoa(T i, char * p) \ { \ - return convert::itoa(i, p); \ + return jeaiii::to_text_from_integer(p, i); \ } #define FOR_MISSING_INTEGER_TYPES(M) \ diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index 2a1cb33b407..74ddceea9dd 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include "Poco/Channel.h" diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index 9068e56a93c..756e427c5f5 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -19,6 +19,7 @@ #include +#include #include "Poco/Foundation.h" #include "Poco/Timestamp.h" diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 08f58335d16..90ae5981a21 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -228,6 +228,8 @@ add_contrib (ulid-c-cmake ulid-c) add_contrib (libssh-cmake libssh) +add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/azure b/contrib/azure index 6262a76ef4c..92c94d7f37a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 6262a76ef4c4c330c84e58dd4f6f13f4e6230fcd +Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf diff --git a/contrib/google-protobuf-cmake/protobuf_generate.cmake b/contrib/google-protobuf-cmake/protobuf_generate.cmake index 3e30b4e40fd..0731a81aeb8 100644 --- a/contrib/google-protobuf-cmake/protobuf_generate.cmake +++ b/contrib/google-protobuf-cmake/protobuf_generate.cmake @@ -157,15 +157,13 @@ function(protobuf_generate) set(_generated_srcs_all) foreach(_proto ${protobuf_generate_PROTOS}) - get_filename_component(_abs_file ${_proto} ABSOLUTE) - get_filename_component(_abs_dir ${_abs_file} DIRECTORY) - get_filename_component(_basename ${_proto} NAME_WE) - file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) - - set(_possible_rel_dir) - if (NOT protobuf_generate_APPEND_PATH) - set(_possible_rel_dir ${_rel_dir}/) - endif() + # The protobuf compiler doesn't return paths to the files it generates so we have to calculate those paths here: + # _abs_file - absolute path to a .proto file, + # _possible_rel_dir - relative path to the .proto file from some import directory specified in Protobuf_IMPORT_DIRS, + # _basename - filename of the .proto file (without path and without extenstion). + get_proto_absolute_path(_abs_file "${_proto}" ${_protobuf_include_path}) + get_proto_relative_path(_possible_rel_dir "${_abs_file}" ${_protobuf_include_path}) + get_filename_component(_basename "${_abs_file}" NAME_WE) set(_generated_srcs) foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) @@ -173,7 +171,7 @@ function(protobuf_generate) endforeach() if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) - set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") + set(_descriptor_file "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}.desc") set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") list(APPEND _generated_srcs ${_descriptor_file}) endif() @@ -196,3 +194,36 @@ function(protobuf_generate) target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) endif() endfunction() + +# Calculates the absolute path to a .proto file. +function(get_proto_absolute_path result proto) + cmake_path(IS_ABSOLUTE proto _is_abs_path) + if(_is_abs_path) + set(${result} "${proto}" PARENT_SCOPE) + return() + endif() + foreach(_include_dir ${ARGN}) + if(EXISTS "${_include_dir}/${proto}") + set(${result} "${_include_dir}/${proto}" PARENT_SCOPE) + return() + endif() + endforeach() + message(SEND_ERROR "Not found protobuf ${proto} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() + +# Calculates a relative path to a .proto file. The returned path is relative to one of include directories. +function(get_proto_relative_path result abs_path) + set(${result} "" PARENT_SCOPE) + get_filename_component(_abs_dir "${abs_path}" DIRECTORY) + foreach(_include_dir ${ARGN}) + cmake_path(IS_PREFIX _include_dir "${_abs_dir}" _is_prefix) + if(_is_prefix) + file(RELATIVE_PATH _rel_dir "${_include_dir}" "${_abs_dir}") + if(NOT _rel_dir STREQUAL "") + set(${result} "${_rel_dir}/" PARENT_SCOPE) + endif() + return() + endif() + endforeach() + message(WARNING "Not found protobuf ${abs_path} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index a54bd8c1de2..0a650f2bcc0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ else () endif () if (NOT ENABLE_ICU) - message(STATUS "Not using icu") + message(STATUS "Not using ICU") return() endif() diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 023fdcf103a..38ebcc8f680 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,11 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + else() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + endif() else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/contrib/llvm-project b/contrib/llvm-project index d2142eed980..2a8967b60cb 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f +Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc diff --git a/contrib/openssl b/contrib/openssl index 5d81fa7068f..ee2bb8513b2 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c +Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08 diff --git a/contrib/prometheus-protobufs-cmake/CMakeLists.txt b/contrib/prometheus-protobufs-cmake/CMakeLists.txt new file mode 100644 index 00000000000..8c939902be7 --- /dev/null +++ b/contrib/prometheus-protobufs-cmake/CMakeLists.txt @@ -0,0 +1,34 @@ +option(ENABLE_PROMETHEUS_PROTOBUFS "Enable Prometheus Protobufs" ${ENABLE_PROTOBUF}) + +if(NOT ENABLE_PROMETHEUS_PROTOBUFS) + message(STATUS "Not using prometheus-protobufs") + return() +endif() + +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src") +set(Prometheus_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs") +set(GogoProto_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs-gogo") + +# Protobuf_IMPORT_DIRS specify where the protobuf compiler will look for .proto files. +set(Old_Protobuf_IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) +list(APPEND Protobuf_IMPORT_DIRS "${Protobuf_INCLUDE_DIR}" "${Prometheus_INCLUDE_DIR}" "${GogoProto_INCLUDE_DIR}") + +PROTOBUF_GENERATE_CPP(prometheus_protobufs_sources prometheus_protobufs_headers + "prompb/remote.proto" + "prompb/types.proto" + "gogoproto/gogo.proto" +) + +set(Protobuf_IMPORT_DIRS ${Old_Protobuf_IMPORT_DIRS}) + +# Ignore warnings while compiling protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") + +# Disable clang-tidy for protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_CLANG_TIDY "") + +add_library(_prometheus_protobufs ${prometheus_protobufs_sources} ${prometheus_protobufs_headers}) +target_include_directories(_prometheus_protobufs SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries (_prometheus_protobufs PUBLIC ch_contrib::protobuf) + +add_library (ch_contrib::prometheus_protobufs ALIAS _prometheus_protobufs) diff --git a/contrib/prometheus-protobufs-gogo/LICENSE b/contrib/prometheus-protobufs-gogo/LICENSE new file mode 100644 index 00000000000..16be18e5c50 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2022, The Cosmos SDK Authors. All rights reserved. +Copyright (c) 2013, The GoGo Authors. All rights reserved. + +Protocol Buffers for Go with Gadgets + +Go support for Protocol Buffers - Google's data interchange format + +Copyright 2010 The Go Authors. All rights reserved. +https://github.com/golang/protobuf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/prometheus-protobufs-gogo/README b/contrib/prometheus-protobufs-gogo/README new file mode 100644 index 00000000000..c40bc42df66 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/README @@ -0,0 +1,4 @@ +File "gogoproto/gogo.proto" was downloaded from the "Protocol Buffers for Go with Gadgets" project: +https://github.com/cosmos/gogoproto/blob/main/gogoproto/gogo.proto + +File "gogoproto/gogo.proto" is used in ClickHouse to compile prometheus protobufs. diff --git a/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto new file mode 100644 index 00000000000..974b36a7ccd --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto @@ -0,0 +1,145 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/cosmos/gogoproto +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto2"; +package gogoproto; + +import "google/protobuf/descriptor.proto"; + +option java_package = "com.google.protobuf"; +option java_outer_classname = "GoGoProtos"; +option go_package = "github.com/cosmos/gogoproto/gogoproto"; + +extend google.protobuf.EnumOptions { + optional bool goproto_enum_prefix = 62001; + optional bool goproto_enum_stringer = 62021; + optional bool enum_stringer = 62022; + optional string enum_customname = 62023; + optional bool enumdecl = 62024; +} + +extend google.protobuf.EnumValueOptions { + optional string enumvalue_customname = 66001; +} + +extend google.protobuf.FileOptions { + optional bool goproto_getters_all = 63001; + optional bool goproto_enum_prefix_all = 63002; + optional bool goproto_stringer_all = 63003; + optional bool verbose_equal_all = 63004; + optional bool face_all = 63005; + optional bool gostring_all = 63006; + optional bool populate_all = 63007; + optional bool stringer_all = 63008; + optional bool onlyone_all = 63009; + + optional bool equal_all = 63013; + optional bool description_all = 63014; + optional bool testgen_all = 63015; + optional bool benchgen_all = 63016; + optional bool marshaler_all = 63017; + optional bool unmarshaler_all = 63018; + optional bool stable_marshaler_all = 63019; + + optional bool sizer_all = 63020; + + optional bool goproto_enum_stringer_all = 63021; + optional bool enum_stringer_all = 63022; + + optional bool unsafe_marshaler_all = 63023; + optional bool unsafe_unmarshaler_all = 63024; + + optional bool goproto_extensions_map_all = 63025; + optional bool goproto_unrecognized_all = 63026; + optional bool gogoproto_import = 63027; + optional bool protosizer_all = 63028; + optional bool compare_all = 63029; + optional bool typedecl_all = 63030; + optional bool enumdecl_all = 63031; + + optional bool goproto_registration = 63032; + optional bool messagename_all = 63033; + + optional bool goproto_sizecache_all = 63034; + optional bool goproto_unkeyed_all = 63035; +} + +extend google.protobuf.MessageOptions { + optional bool goproto_getters = 64001; + optional bool goproto_stringer = 64003; + optional bool verbose_equal = 64004; + optional bool face = 64005; + optional bool gostring = 64006; + optional bool populate = 64007; + optional bool stringer = 67008; + optional bool onlyone = 64009; + + optional bool equal = 64013; + optional bool description = 64014; + optional bool testgen = 64015; + optional bool benchgen = 64016; + optional bool marshaler = 64017; + optional bool unmarshaler = 64018; + optional bool stable_marshaler = 64019; + + optional bool sizer = 64020; + + optional bool unsafe_marshaler = 64023; + optional bool unsafe_unmarshaler = 64024; + + optional bool goproto_extensions_map = 64025; + optional bool goproto_unrecognized = 64026; + + optional bool protosizer = 64028; + optional bool compare = 64029; + + optional bool typedecl = 64030; + + optional bool messagename = 64033; + + optional bool goproto_sizecache = 64034; + optional bool goproto_unkeyed = 64035; +} + +extend google.protobuf.FieldOptions { + optional bool nullable = 65001; + optional bool embed = 65002; + optional string customtype = 65003; + optional string customname = 65004; + optional string jsontag = 65005; + optional string moretags = 65006; + optional string casttype = 65007; + optional string castkey = 65008; + optional string castvalue = 65009; + + optional bool stdtime = 65010; + optional bool stdduration = 65011; + optional bool wktpointer = 65012; + + optional string castrepeated = 65013; +} diff --git a/contrib/prometheus-protobufs/LICENSE b/contrib/prometheus-protobufs/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/contrib/prometheus-protobufs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/prometheus-protobufs/README b/contrib/prometheus-protobufs/README new file mode 100644 index 00000000000..c557e59bb93 --- /dev/null +++ b/contrib/prometheus-protobufs/README @@ -0,0 +1,2 @@ +Files "prompb/remote.proto" and "prompb/types.proto" were downloaded from the Prometheus repository: +https://github.com/prometheus/prometheus/tree/main/prompb diff --git a/contrib/prometheus-protobufs/prompb/remote.proto b/contrib/prometheus-protobufs/prompb/remote.proto new file mode 100644 index 00000000000..50bb25e7fac --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/remote.proto @@ -0,0 +1,88 @@ +// Copyright 2016 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "prompb/types.proto"; +import "gogoproto/gogo.proto"; + +message WriteRequest { + repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false]; + // Cortex uses this field to determine the source of the write request. + // We reserve it to avoid any compatibility issues. + reserved 2; + repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false]; +} + +// ReadRequest represents a remote read request. +message ReadRequest { + repeated Query queries = 1; + + enum ResponseType { + // Server will return a single ReadResponse message with matched series that includes list of raw samples. + // It's recommended to use streamed response types instead. + // + // Response headers: + // Content-Type: "application/x-protobuf" + // Content-Encoding: "snappy" + SAMPLES = 0; + // Server will stream a delimited ChunkedReadResponse message that + // contains XOR or HISTOGRAM(!) encoded chunks for a single series. + // Each message is following varint size and fixed size bigendian + // uint32 for CRC32 Castagnoli checksum. + // + // Response headers: + // Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" + // Content-Encoding: "" + STREAMED_XOR_CHUNKS = 1; + } + + // accepted_response_types allows negotiating the content type of the response. + // + // Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is + // implemented by server, error is returned. + // For request that do not contain `accepted_response_types` field the SAMPLES response type will be used. + repeated ResponseType accepted_response_types = 2; +} + +// ReadResponse is a response when response_type equals SAMPLES. +message ReadResponse { + // In same order as the request's queries. + repeated QueryResult results = 1; +} + +message Query { + int64 start_timestamp_ms = 1; + int64 end_timestamp_ms = 2; + repeated prometheus.LabelMatcher matchers = 3; + prometheus.ReadHints hints = 4; +} + +message QueryResult { + // Samples within a time series must be ordered by time. + repeated prometheus.TimeSeries timeseries = 1; +} + +// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS. +// We strictly stream full series after series, optionally split by time. This means that a single frame can contain +// partition of the single series, but once a new series is started to be streamed it means that no more chunks will +// be sent for previous one. Series are returned sorted in the same way TSDB block are internally. +message ChunkedReadResponse { + repeated prometheus.ChunkedSeries chunked_series = 1; + + // query_index represents an index of the query from ReadRequest.queries these chunks relates to. + int64 query_index = 2; +} diff --git a/contrib/prometheus-protobufs/prompb/types.proto b/contrib/prometheus-protobufs/prompb/types.proto new file mode 100644 index 00000000000..61fc1e0143e --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/types.proto @@ -0,0 +1,187 @@ +// Copyright 2017 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "gogoproto/gogo.proto"; + +message MetricMetadata { + enum MetricType { + UNKNOWN = 0; + COUNTER = 1; + GAUGE = 2; + HISTOGRAM = 3; + GAUGEHISTOGRAM = 4; + SUMMARY = 5; + INFO = 6; + STATESET = 7; + } + + // Represents the metric type, these match the set from Prometheus. + // Refer to github.com/prometheus/common/model/metadata.go for details. + MetricType type = 1; + string metric_family_name = 2; + string help = 4; + string unit = 5; +} + +message Sample { + double value = 1; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 2; +} + +message Exemplar { + // Optional, can be empty. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + double value = 2; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 3; +} + +// A native histogram, also known as a sparse histogram. +// Original design doc: +// https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit +// The appendix of this design doc also explains the concept of float +// histograms. This Histogram message can represent both, the usual +// integer histogram as well as a float histogram. +message Histogram { + enum ResetHint { + UNKNOWN = 0; // Need to test for a counter reset explicitly. + YES = 1; // This is the 1st histogram after a counter reset. + NO = 2; // There was no counter reset between this and the previous Histogram. + GAUGE = 3; // This is a gauge histogram where counter resets don't happen. + } + + oneof count { // Count of observations in the histogram. + uint64 count_int = 1; + double count_float = 2; + } + double sum = 3; // Sum of observations in the histogram. + // The schema defines the bucket schema. Currently, valid numbers + // are -4 <= n <= 8. They are all for base-2 bucket schemas, where 1 + // is a bucket boundary in each case, and then each power of two is + // divided into 2^n logarithmic buckets. Or in other words, each + // bucket boundary is the previous boundary times 2^(2^-n). In the + // future, more bucket schemas may be added using numbers < -4 or > + // 8. + sint32 schema = 4; + double zero_threshold = 5; // Breadth of the zero bucket. + oneof zero_count { // Count in zero bucket. + uint64 zero_count_int = 6; + double zero_count_float = 7; + } + + // Negative Buckets. + repeated BucketSpan negative_spans = 8 [(gogoproto.nullable) = false]; + // Use either "negative_deltas" or "negative_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 negative_deltas = 9; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double negative_counts = 10; // Absolute count of each bucket. + + // Positive Buckets. + repeated BucketSpan positive_spans = 11 [(gogoproto.nullable) = false]; + // Use either "positive_deltas" or "positive_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 positive_deltas = 12; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double positive_counts = 13; // Absolute count of each bucket. + + ResetHint reset_hint = 14; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 15; +} + +// A BucketSpan defines a number of consecutive buckets with their +// offset. Logically, it would be more straightforward to include the +// bucket counts in the Span. However, the protobuf representation is +// more compact in the way the data is structured here (with all the +// buckets in a single array separate from the Spans). +message BucketSpan { + sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative). + uint32 length = 2; // Length of consecutive buckets. +} + +// TimeSeries represents samples and labels for a single time series. +message TimeSeries { + // For a timeseries to be valid, and for the samples and exemplars + // to be ingested by the remote system properly, the labels field is required. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + repeated Sample samples = 2 [(gogoproto.nullable) = false]; + repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false]; + repeated Histogram histograms = 4 [(gogoproto.nullable) = false]; +} + +message Label { + string name = 1; + string value = 2; +} + +message Labels { + repeated Label labels = 1 [(gogoproto.nullable) = false]; +} + +// Matcher specifies a rule, which can match or set of labels or not. +message LabelMatcher { + enum Type { + EQ = 0; + NEQ = 1; + RE = 2; + NRE = 3; + } + Type type = 1; + string name = 2; + string value = 3; +} + +message ReadHints { + int64 step_ms = 1; // Query step size in milliseconds. + string func = 2; // String representation of surrounding function or aggregation. + int64 start_ms = 3; // Start time in milliseconds. + int64 end_ms = 4; // End time in milliseconds. + repeated string grouping = 5; // List of label names used in aggregation. + bool by = 6; // Indicate whether it is without or by. + int64 range_ms = 7; // Range vector selector range in milliseconds. +} + +// Chunk represents a TSDB chunk. +// Time range [min, max] is inclusive. +message Chunk { + int64 min_time_ms = 1; + int64 max_time_ms = 2; + + // We require this to match chunkenc.Encoding. + enum Encoding { + UNKNOWN = 0; + XOR = 1; + HISTOGRAM = 2; + FLOAT_HISTOGRAM = 3; + } + Encoding type = 3; + bytes data = 4; +} + +// ChunkedSeries represents single, encoded time series. +message ChunkedSeries { + // Labels should be sorted. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + // Chunks will be in start time order and may overlap. + repeated Chunk chunks = 2 [(gogoproto.nullable) = false]; +} diff --git a/contrib/s2geometry b/contrib/s2geometry index 0547c383717..6522a40338d 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b +Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 6632f9c27d5..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) +option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) if (NOT ENABLE_S2_GEOMETRY) - message(STATUS "Not using S2 geometry") + message(STATUS "Not using S2 Geometry") return() endif() @@ -38,6 +38,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2cell_index.cc" "${S2_SOURCE_DIR}/s2/s2cell_union.cc" "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2chain_interpolation_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" @@ -46,6 +47,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2coords.cc" "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2density_tree.cc" "${S2_SOURCE_DIR}/s2/s2earth.cc" "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" @@ -53,8 +55,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2fractal.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" + "${S2_SOURCE_DIR}/s2/s2index_cell_data.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" @@ -63,10 +67,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" "${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc" "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc" - "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" "${S2_SOURCE_DIR}/s2/s2point_compression.cc" @@ -80,10 +84,11 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2predicates.cc" "${S2_SOURCE_DIR}/s2/s2projections.cc" "${S2_SOURCE_DIR}/s2/s2r2rect.cc" - "${S2_SOURCE_DIR}/s2/s2region.cc" - "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2random.cc" "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_sharder.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" "${S2_SOURCE_DIR}/s2/s2region_union.cc" "${S2_SOURCE_DIR}/s2/s2shape_index.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" @@ -94,9 +99,12 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_count_vertices.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_wrap.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2testing.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" @@ -140,6 +148,7 @@ target_link_libraries(_s2 PRIVATE absl::strings absl::type_traits absl::utility + absl::vlog_is_on ) target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") diff --git a/contrib/sysroot b/contrib/sysroot index 39c4713334f..cc385041b22 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c +Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 diff --git a/contrib/vectorscan b/contrib/vectorscan index 38431d11178..d29730e1cb9 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 38431d111781843741a781a57a6381a527d900a4 +Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809 diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index d6c626c1612..35d5fd3dc82 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,11 +1,8 @@ -# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. - +# Vectorscan is drop-in replacement for Hyperscan. if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) - option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) + option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES}) endif() -# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine - if (NOT ENABLE_VECTORSCAN) message (STATUS "Not using vectorscan") return() @@ -272,34 +269,24 @@ if (ARCH_AARCH64) ) endif() -# TODO -# if (ARCH_PPC64LE) -# list(APPEND SRCS -# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" -# ) -# endif() - add_library (_vectorscan ${SRCS}) -target_compile_options (_vectorscan PRIVATE - -fno-sanitize=undefined # assume the library takes care of itself - -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system -) # library has too much debug information if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options (_vectorscan PRIVATE -g0) endif() -# Include version header manually generated by running the original build system -target_include_directories (_vectorscan SYSTEM PRIVATE common) +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Makes the version header visible. It was generated by running the native build system manually. +# Please update whenever you update vectorscan. +target_include_directories (_vectorscan SYSTEM PUBLIC common) # vectorscan inherited some patched in-source versions of boost headers to fix a bug in # boost 1.69. This bug has been solved long ago but vectorscan's source code still # points to the patched versions, so include it here. target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") -target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - # Include platform-specific config header generated by manually running the original build system # Please regenerate these files if you update vectorscan. diff --git a/contrib/vectorscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h index 8315b44fb2a..3d266484095 100644 --- a/contrib/vectorscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,8 +32,12 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.4.7 2022-06-20" +#define HS_VERSION_STRING "5.4.11 2024-07-04" #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 11 + #endif /* HS_VERSION_H_C6428FAF8E3713 */ diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh index 032aceb0af3..bd5f2fe8466 100755 --- a/docker/packager/binary-builder/build.sh +++ b/docker/packager/binary-builder/build.sh @@ -111,6 +111,7 @@ fi mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output +[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure diff --git a/docker/packager/packager b/docker/packager/packager index 2dcbd8d695e..da4af7fc1be 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -276,10 +276,7 @@ def parse_env_variables( if is_release_build(debug_build, package_type, sanitizer, coverage): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") - if is_cross_arm: - cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") - else: - result.append("BUILD_MUSL_KEEPER=1") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") elif package_type == "fuzzers": cmake_flags.append("-DENABLE_FUZZING=1") cmake_flags.append("-DENABLE_PROTOBUF=1") diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 09a9f51084b..2215ac2b37c 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -213,6 +213,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--azure-blob-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..5ece9743498 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -207,7 +207,7 @@ function run_tests() if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # to disable the same tests - ADDITIONAL_OPTIONS+=('--s3-storage') + ADDITIONAL_OPTIONS+=('--azure-blob-storage') # azurite is slow, but with these two settings it can be super slow ADDITIONAL_OPTIONS+=('--no-random-settings') ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') @@ -253,7 +253,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6d121ba4142..323944591b1 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -110,6 +110,15 @@ start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="s3_cache" +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="azure_cache" +else + TEMP_POLICY="default" +fi + + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, @@ -135,7 +144,7 @@ clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnabl URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, @@ -161,7 +170,7 @@ clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable U URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, @@ -195,7 +204,7 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) - SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" @@ -211,19 +220,29 @@ clickhouse-client --query "SYSTEM STOP THREAD FUZZER" stop_server # Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 configure -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp -mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml \ + | sed "s|
azure
|
azure
default|" \ + > /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml +fi + sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ | sed "s|trace|test|" \ diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f0c4e1b0e34..3826e4e9c94 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `TDigest` - Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. + [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. - `Uniq` - - Estimate the number of distinct values of a column by HyperLogLog. + + [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. ## Column-level Settings {#column-level-settings} diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 46c24ad8491..fc861e25e9f 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`. - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3d6d776f4da..1d74a63b972 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2536,7 +2536,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. -Default value: `0`. +Default value: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 80024781f88..6880cef0e5c 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns: ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` -:::note +:::note Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 0253bc647e6..b866d0b9f5f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -152,7 +152,7 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. +Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s. Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..e15f8a40156 --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,36 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, max_query_length[, random_seed]]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 1a7e2b8d66a..35e5d86034c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -269,9 +269,9 @@ FROM s3( ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. Type: `LowCardinalty(String)`. -- `_file` — Name of the file. Type: `LowCardinalty(String)`. -- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}" +- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive. +- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..16225d4b0e2 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | @@ -80,8 +81,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used. ## Examples diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 3a70a0bac12..84bbf6c83d3 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2077,7 +2077,7 @@ SELECT * FROM test_table - 0 — оптимизация отключена. - 1 — оптимизация включена. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 4640882f2be..6b3a0b16624 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -66,18 +66,18 @@ else() message(STATUS "Library bridge mode: OFF") endif() -if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") -else() - message(STATUS "ClickHouse keeper mode: OFF") -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) message(STATUS "ClickHouse keeper-converter mode: ON") else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER) + message(STATUS "ClickHouse Keeper: ON") +else() + message(STATUS "ClickHouse Keeper: OFF") +endif() + if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) message(STATUS "ClickHouse keeper-client mode: ON") else() @@ -131,10 +131,6 @@ add_subdirectory (static-files-disk-uploader) add_subdirectory (su) add_subdirectory (disks) -if (ENABLE_CLICKHOUSE_KEEPER) - add_subdirectory (keeper) -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) add_subdirectory (keeper-converter) endif() @@ -143,6 +139,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) add_subdirectory (keeper-client) endif() +if (ENABLE_CLICKHOUSE_KEEPER) + add_subdirectory (keeper) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () diff --git a/programs/client/Client.h b/programs/client/Client.h index 229608f787d..6d57a6ea648 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,7 +9,10 @@ namespace DB class Client : public ClientBase { public: - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 079951be55e..9b931c49c24 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,4 +1,5 @@ set(CLICKHOUSE_KEEPER_SOURCES + keeper_main.cpp Keeper.cpp ) @@ -8,6 +9,9 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_io clickhouse_common_zookeeper daemon + clickhouse-keeper-converter-lib + clickhouse-keeper-client-lib + clickhouse_functions dbms ) @@ -17,199 +21,11 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources - set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp + clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_SOURCES}) - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp - - Keeper.cpp - clickhouse-keeper.cpp - ) - - # List of resources for clickhouse-keeper client - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) - list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp - ) - endif() - - clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) - - # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD) - target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) - - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim) - target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim) - endif() - - target_link_libraries(clickhouse-keeper - PRIVATE - ch_contrib::abseil_swiss_tables - ch_contrib::nuraft - ch_contrib::lz4 - ch_contrib::zstd - ch_contrib::cityhash - ch_contrib::jemalloc - common ch_contrib::double_conversion - ch_contrib::dragonbox_to_chars - pcg_random - ch_contrib::pdqsort - ch_contrib::miniselect - clickhouse_common_config_no_zookeeper_log - loggers_no_text_log - clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. - ) + target_link_libraries(clickhouse-keeper PUBLIC ${CLICKHOUSE_KEEPER_LINK}) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - if (SPLIT_DEBUG_SYMBOLS) clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) else() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..967920557e1 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -75,16 +77,6 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD - -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} - -#endif - namespace DB { @@ -277,6 +269,9 @@ HTTPContextPtr httpContext() int Keeper::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif Poco::Logger * log = &logger(); UseSSL use_ssl; diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp deleted file mode 100644 index f2f91930ac0..00000000000 --- a/programs/keeper/clickhouse-keeper.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include "config_tools.h" - - -int mainEntryClickHouseKeeper(int argc, char ** argv); - -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT -int mainEntryClickHouseKeeperClient(int argc, char ** argv); -#endif - -int main(int argc_, char ** argv_) -{ -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT - - if (argc_ >= 2) - { - /// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK - if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0) - { - argv_[1] = argv_[0]; - return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1); - } - } - - if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client"))) - return mainEntryClickHouseKeeperClient(argc_, argv_); -#endif - - return mainEntryClickHouseKeeper(argc_, argv_); -} diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp new file mode 100644 index 00000000000..a240f9699f2 --- /dev/null +++ b/programs/keeper/keeper_main.cpp @@ -0,0 +1,189 @@ +#include +#include + +#include +#include +#include +#include +#include /// pair + +#include + +#include "config.h" +#include "config_tools.h" + +#include +#include + +#include +#include +#include + +#include +#include + + +int mainEntryClickHouseKeeper(int argc, char ** argv); +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT +int mainEntryClickHouseKeeperClient(int argc, char ** argv); +#endif + +namespace +{ + +using MainFunc = int (*)(int, char**); + +/// Add an item here to register new application +std::pair clickhouse_applications[] = +{ + // keeper + {"keeper", mainEntryClickHouseKeeper}, +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"converter", mainEntryClickHouseKeeperConverter}, + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT + {"client", mainEntryClickHouseKeeperClient}, + {"keeper-client", mainEntryClickHouseKeeperClient}, +#endif + +}; + +int printHelp(int, char **) +{ + std::cerr << "Use one of the following commands:" << std::endl; + for (auto & application : clickhouse_applications) + std::cerr << "clickhouse " << application.first << " [args] " << std::endl; + return -1; +} + +} + + +bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) +{ + /// Use app if the first arg 'app' is passed (the arg should be quietly removed) + if (argv.size() >= 2) + { + auto first_arg = argv.begin() + 1; + + /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok + if (*first_arg == app_suffix + || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix)) + { + argv.erase(first_arg); + return true; + } + } + + /// keeper suffix is default which will be used if no other app is detected + if (app_suffix == "keeper") + return false; + + /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app + std::string app_name = "clickhouse-" + std::string(app_suffix); + return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); +} + +/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. +/// We don't use it. But it can be used by some libraries for implementation of "plugins". +/// We absolutely discourage the ancient technique of loading +/// 3rd-party uncontrolled dangerous libraries into the process address space, +/// because it is insane. + +#if !defined(USE_MUSL) +extern "C" +{ + void * dlopen(const char *, int) + { + return nullptr; + } + + void * dlmopen(long, const char *, int) // NOLINT + { + return nullptr; + } + + int dlclose(void *) + { + return 0; + } + + const char * dlerror() + { + return "ClickHouse does not allow dynamic library loading"; + } +} +#endif + +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + +int main(int argc_, char ** argv_) +{ + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + /// Note: we forbid dlopen in our code. + updatePHDRCache(); + +#if !defined(USE_MUSL) + checkHarmfulEnvironmentVariables(argv_); +#endif + + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + + /// Reset new handler to default (that throws std::bad_alloc) + /// It is needed because LLVM library clobbers it. + std::set_new_handler(nullptr); + + std::vector argv(argv_, argv_ + argc_); + + /// Print a basic help if nothing was matched + MainFunc main_func = mainEntryClickHouseKeeper; + + if (isClickhouseApp("help", argv)) + { + main_func = printHelp; + } + else + { + for (auto & application : clickhouse_applications) + { + if (isClickhouseApp(application.first, argv)) + { + main_func = application.second; + break; + } + } + } + + int exit_code = main_func(static_cast(argv.size()), argv.data()); + +#if defined(SANITIZE_COVERAGE) + dumpCoverage(); +#endif + + return exit_code; +} diff --git a/programs/main.cpp b/programs/main.cpp index 61e2bc18ed7..02ea1471108 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include @@ -7,7 +5,6 @@ #include #include #include -#include #include #include /// pair @@ -16,6 +13,8 @@ #include "config.h" #include "config_tools.h" +#include +#include #include #include #include @@ -119,268 +118,6 @@ std::pair clickhouse_short_names[] = {"chc", "client"}, }; - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } -} - - -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - } bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 940970f36ab..41a9813ce50 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -47,9 +48,17 @@ Chunk ODBCSource::generate() for (int idx = 0; idx < result.columns(); ++idx) { const auto & sample = description.sample_block.getByPosition(idx); - if (!result.is_null(idx)) - insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + { + if (columns[idx]->isNullable()) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + } else insertDefaultValue(*columns[idx], *sample.column); } diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 4b6dd07f618..32b686d40dd 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -10,9 +10,24 @@ else () set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") endif () -add_custom_target (self-extracting ALL +add_custom_target (self-extracting-server ALL ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped DEPENDS clickhouse clickhouse-stripped compressor ) + +set(self_extracting_deps "self-extracting-server") + +if (BUILD_STANDALONE_KEEPER) + add_custom_target (self-extracting-keeper ALL + ${CMAKE_COMMAND} -E remove clickhouse-keeper + COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-keeper ../clickhouse-keeper + DEPENDS compressor clickhouse-keeper + ) + list(APPEND self_extracting_deps "self-extracting-keeper") +endif() + +add_custom_target (self-extracting ALL + DEPENDS ${self_extracting_deps} +) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..564afec9cf4 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -133,10 +134,6 @@ # include #endif -#if USE_JEMALLOC -# include -#endif - #if USE_AZURE_BLOB_STORAGE # include # include @@ -176,34 +173,10 @@ namespace ProfileEvents namespace fs = std::filesystem; -#if USE_JEMALLOC -static bool jemallocOptionEnabled(const char *name) -{ - bool value; - size_t size = sizeof(value); - - if (mallctl(name, reinterpret_cast(&value), &size, /* newp= */ nullptr, /* newlen= */ 0)) - throw Poco::SystemException("mallctl() failed"); - - return value; -} -#else -static bool jemallocOptionEnabled(const char *) { return false; } -#endif - int mainEntryClickHouseServer(int argc, char ** argv) { DB::Server app; - if (jemallocOptionEnabled("opt.background_thread")) - { - LOG_ERROR(&app.logger(), - "jemalloc.background_thread was requested, " - "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, " - "and also background_thread is not compatible with ClickHouse watchdog " - "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)"); - } - /// Do not fork separate process from watchdog if we attached to terminal. /// Otherwise it breaks gdb usage. /// Can be overridden by environment variable (cannot use server config at this moment). @@ -653,9 +626,35 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif + Stopwatch startup_watch; Poco::Logger * log = &logger(); @@ -739,7 +738,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index b21d4b86314..45f988f7b1e 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -506,6 +506,14 @@ let user = 'default'; let password = ''; let add_http_cors_header = (location.protocol != 'file:'); +const current_url = new URL(window.location); +/// Substitute user name if it's specified in the query string +const user_from_url = current_url.searchParams.get('user'); +if (user_from_url) { + user = user_from_url; +} + + const errorCodeMessageMap = { 516: 'Error authenticating with database. Please check your connection params and try again.' } diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index e6798a792dd..098e2858abc 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -164,32 +165,15 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); + aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); + resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); - resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); + resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); return arithmetic_function_clone; } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) - { - auto function_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get( - aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index ebefc12ae53..180470952cd 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -18,19 +19,18 @@ namespace DB namespace { -class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor +class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext { public: - explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) - : context(std::move(context_)) - {} + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) { return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; } - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * function_node = node->as(); if (!function_node) @@ -171,13 +171,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext()); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext()); result_function = std::move(not_function); } @@ -197,18 +197,10 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext()); return comparison_function; } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - ContextPtr context; }; } diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 3307c440f42..23fde89a348 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -77,11 +78,9 @@ public: /// Replace `countDistinct` of initial query into `count` auto result_type = function_node->getResultType(); - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count"); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 15ac8d642a4..90051779a26 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -16,6 +17,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -23,202 +27,410 @@ namespace DB namespace { -class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext +struct ColumnContext +{ + NameAndTypePair column; + QueryTreeNodePtr column_source; + ContextPtr context; +}; + +using NodeToSubcolumnTransformer = std::function; + +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) +{ + /// Replace `length(argument)` with `argument.size0` + /// `argument` may be Array or Map. + + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); +} + +template +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) +{ + /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive + /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive + /// `argument` may be Array or Map. + + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + const auto * function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context); +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + if (value.getType() == Field::Types::UInt64) + return data_type_tuple.getNameByPosition(value.get()); + + return ""; +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + return ""; +} + +template +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) +{ + /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. + /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. + + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + if (function_arguments_nodes.size() != 2) + return; + + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (!second_argument_constant_node) + return; + + const auto & data_type_concrete = assert_cast(*ctx.column.type); + auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + + if (subcolumn_name.empty()) + return; + + NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); +} + +std::map, NodeToSubcolumnTransformer> node_transformers = +{ + { + {TypeIndex::Array, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Array, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Map, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "mapKeys"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `mapKeys(map_argument)` with `map_argument.keys` + NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); + }, + }, + { + {TypeIndex::Map, "mapValues"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `mapValues(map_argument)` with `map_argument.values` + NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); + }, + }, + { + {TypeIndex::Map, "mapContains"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` + const auto & data_type_map = assert_cast(*ctx.column.type); + + NameAndTypePair column{ctx.column.name + ".keys", std::make_shared(data_type_map.getKeyType())}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto has_function_argument = std::make_shared(column, ctx.column_source); + function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context); + }, + }, + { + {TypeIndex::Nullable, "count"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto new_column_node = std::make_shared(column, ctx.column_source); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(function_node, "sum"); + }, + }, + { + {TypeIndex::Nullable, "isNull"}, + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) + { + /// Replace `isNull(nullable_argument)` with `nullable_argument.null` + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); + }, + }, + { + {TypeIndex::Nullable, "isNotNull"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes = {std::make_shared(column, ctx.column_source)}; + resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context); + }, + }, + { + {TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement, + }, + { + {TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement, + }, +}; + +std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) +{ + auto * function_node = node->as(); + if (!function_node) + return {}; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); + if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) + return {}; + + auto * first_argument_column_node = function_arguments_nodes.front()->as(); + if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set") + return {}; + + auto column_source = first_argument_column_node->getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + const auto & storage = table_node->getStorage(); + const auto & storage_snapshot = table_node->getStorageSnapshot(); + auto column = first_argument_column_node->getColumn(); + + if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata)) + return {}; + + auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name); + if (!column_in_table || !column_in_table->type->equals(*column.type)) + return {}; + + return std::make_tuple(function_node, first_argument_column_node, table_node); +} + +/// First pass collects info about identifiers to determine which identifiers are allowed to optimize. +class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; + void enterImpl(const QueryTreeNodePtr & node) + { + if (!getSettings().optimize_functions_to_subcolumns) + return; + + if (auto * table_node = node->as()) + { + enterImpl(*table_node); + return; + } + + if (auto * column_node = node->as()) + { + enterImpl(*column_node); + return; + } + + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); + if (function_node && first_argument_node && table_node) + { + enterImpl(*function_node, *first_argument_node, *table_node); + return; + } + + if (const auto * join_node = node->as()) + { + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls; + return; + } + + if (const auto * query_node = node->as()) + { + if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets()) + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls; + return; + } + } + + std::unordered_set getIdentifiersToOptimize() const + { + if (can_wrap_result_columns_with_nullable) + { + /// Do not optimize if we have JOIN with setting join_use_null. + /// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls. + /// It may change the behaviour if subcolumn can be converted + /// to Nullable while the original column cannot (e.g. for Array type). + return {}; + } + + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + std::unordered_set identifiers_to_optimize; + for (const auto & [identifier, count] : optimized_identifiers_count) + { + if (all_key_columns.contains(identifier)) + continue; + + auto it = identifiers_count.find(identifier); + if (it != identifiers_count.end() && it->second == count) + identifiers_to_optimize.insert(identifier); + } + + return identifiers_to_optimize; + } + +private: + std::unordered_set all_key_columns; + std::unordered_map identifiers_count; + std::unordered_map optimized_identifiers_count; + + NameSet processed_tables; + bool can_wrap_result_columns_with_nullable = false; + + void enterImpl(const TableNode & table_node) + { + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + if (processed_tables.emplace(table_name).second) + return; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + all_key_columns.insert(identifier); + } + }; + + const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + + add_key_columns(primary_key_columns); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + + void enterImpl(const ColumnNode & column_node) + { + if (column_node.getColumnName() == "__grouping_set") + return; + + auto column_source = column_node.getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return; + + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column_node.getColumnName()}); + + ++identifiers_count[qualified_name]; + } + + void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) + { + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + return; + + const auto & column = first_argument_column_node.getColumn(); + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) + ++optimized_identifiers_count[qualified_name]; + } +}; + +/// Second pass optimizes functions to subcolumns for allowed identifiers. +class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext +{ +private: + std::unordered_set identifiers_to_optimize; + +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set identifiers_to_optimize_) + : Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + void enterImpl(QueryTreeNodePtr & node) const { if (!getSettings().optimize_functions_to_subcolumns) return; - auto * function_node = node->as(); - if (!function_node) - return; - - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - size_t function_arguments_nodes_size = function_arguments_nodes.size(); - - if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2) - return; - - auto * first_argument_column_node = function_arguments_nodes.front()->as(); - - if (!first_argument_column_node) - return; - - if (first_argument_column_node->getColumnName() == "__grouping_set") - return; - - auto column_source = first_argument_column_node->getColumnSource(); - auto * table_node = column_source->as(); - - if (!table_node) - return; - - const auto & storage = table_node->getStorage(); - if (!storage->supportsSubcolumns()) + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); + if (!function_node || !first_argument_column_node || !table_node) return; auto column = first_argument_column_node->getColumn(); - WhichDataType column_type(column.type); + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); - const auto & function_name = function_node->getFunctionName(); + Identifier qualified_name({table_name, column.name}); + if (!identifiers_to_optimize.contains(qualified_name)) + return; - if (function_arguments_nodes_size == 1) + auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); + if (transformer_it != node_transformers.end()) { - if (column_type.isArray()) - { - if (function_name == "length") - { - /// Replace `length(array_argument)` with `array_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "empty") - { - /// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNode(*function_node, "equals"); - } - else if (function_name == "notEmpty") - { - /// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNode(*function_node, "notEquals"); - } - } - else if (column_type.isNullable()) - { - if (function_name == "isNull") - { - /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - column.name += ".null"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "isNotNull") - { - /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - column.name += ".null"; - column.type = std::make_shared(); - - function_arguments_nodes = {std::make_shared(column, column_source)}; - - resolveOrdinaryFunctionNode(*function_node, "not"); - } - } - else if (column_type.isMap()) - { - if (function_name == "mapKeys") - { - /// Replace `mapKeys(map_argument)` with `map_argument.keys` - column.name += ".keys"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapValues") - { - /// Replace `mapValues(map_argument)` with `map_argument.values` - column.name += ".values"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - } + ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()}; + transformer_it->second(node, *function_node, ctx); } - else - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); - - const auto & data_type_tuple = assert_cast(*column.type); - - String subcolumn_name; - - if (tuple_element_constant_value_type == Field::Types::String) - { - subcolumn_name = tuple_element_constant_value.get(); - } - else if (tuple_element_constant_value_type == Field::Types::UInt64) - { - auto tuple_column_index = tuple_element_constant_value.get(); - subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index); - } - else - { - return; - } - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node) - { - /// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`. - const auto & variant_element_constant_value = second_argument_constant_node->getValue(); - String subcolumn_name; - - if (variant_element_constant_value.getType() != Field::Types::String) - return; - - subcolumn_name = variant_element_constant_value.get(); - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapContains" && column_type.isMap()) - { - const auto & data_type_map = assert_cast(*column.type); - - /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - column.name += ".keys"; - column.type = std::make_shared(data_type_map.getKeyType()); - - auto has_function_argument = std::make_shared(column, column_source); - function_arguments_nodes[0] = std::move(has_function_argument); - - resolveOrdinaryFunctionNode(*function_node, "has"); - } - } - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } }; @@ -226,8 +438,15 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - FunctionToSubcolumnsVisitor visitor(context); - visitor.visit(query_tree_node); + FunctionToSubcolumnsVisitorFirstPass first_visitor(context); + first_visitor.visit(query_tree_node); + auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize(); + + if (identifiers_to_optimize.empty()) + return; + + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index e70e08e65f4..f9066b0363c 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -47,25 +48,17 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count"); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } -private: - static void resolveAsCountAggregateFunction(FunctionNode & function_node) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index 0c37749c706..a8364f1ab7a 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 3500d8327ac..05377f8514e 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -74,8 +74,7 @@ public: new_arguments[1] = std::move(if_arguments_nodes[0]); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } else if (first_const_node) @@ -104,27 +103,10 @@ public: new_arguments[1] = std::move(not_function); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } } - -private: - static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) - { - auto result_type = function_node.getResultType(); - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node.getFunctionName() + "If", - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 5646d26f7f6..524ee807721 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -73,23 +73,24 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*lhs, column_type); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName()); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*rhs_count, column_type); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName()); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(rhs_count); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); if (column_id == 0) new_node->getArguments().getNodes() = {lhs, rhs}; else if (column_id == 1) new_node->getArguments().getNodes() = {rhs, lhs}; - resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName()); + + resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext()); if (!new_node) return; @@ -100,28 +101,7 @@ public: res = createCastFunction(res, function_node->getResultType(), getContext()); node = std::move(res); - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - const auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), - NullsAction::EMPTY, - {argument_type}, - {}, - properties); - - function_node.resolveAsAggregateFunction(aggregate_function); - } - }; } diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 852cbe75c4a..f52d724f346 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -65,7 +66,8 @@ public: auto multiplier_node = function_node_arguments_nodes[0]; function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (constant_value_literal.get() != 1) { @@ -115,7 +117,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_true_condition_value != 1) { @@ -144,7 +146,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_false_condition_value != 1) { @@ -156,15 +158,6 @@ public: } private: - static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - "countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } - QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { auto multiply_function_node = std::make_shared("multiply"); diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 91186db0e0c..947952ac3a8 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index b801865c9a5..83705f54b38 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -184,11 +185,8 @@ public: /// Replace uniq of initial query to count if (match_subquery_with_distinct() || match_subquery_with_group_by()) { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - function_node->getArguments().getNodes().clear(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 3c3489681f6..cf4a3f77e34 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -636,16 +636,16 @@ private: bool has_function = false; }; -inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) +inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name) { Array parameters; - for (const auto & param : function_node->getParameters()) + for (const auto & param : function_node.getParameters()) { auto * constant = param->as(); parameters.push_back(constant->getValue()); } - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); + const auto & function_node_argument_nodes = function_node.getArguments().getNodes(); DataTypes argument_types; argument_types.reserve(function_node_argument_nodes.size()); @@ -655,7 +655,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod AggregateFunctionProperties properties; auto action = NullsAction::EMPTY; - return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); + return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties); } } @@ -736,11 +736,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context) { if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull") return; - function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } else if (function_node->isWindowFunction()) { - function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } } @@ -793,6 +793,18 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context) +{ + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); +} + +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name) +{ + auto aggregate_function = resolveAggregateFunction(function_node, function_name); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); +} + /** Returns: * {_, false} - multiple sources * {nullptr, true} - no sources (for constants) diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index f64b724abeb..f2e2c500384 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -112,6 +112,14 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Resolves function node as ordinary function with given name. +/// Arguments and parameters are taken from the node. +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context); + +/// Resolves function node as aggregate function with given name. +/// Arguments and parameters are taken from the node. +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); + /// Checks that node has only one source and returns it QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 44e7808babc..cee41861d70 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -29,48 +29,49 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); - object_storage = std::make_unique("BackupReaderAzureBlobStorage", - std::move(client_ptr), - configuration.createSettings(context_), - configuration_.container, - configuration.getConnectionURL().toString()); + object_storage = std::make_unique( + "BackupReaderAzureBlobStorage", + std::move(client_ptr), + std::move(settings_ptr), + connection_params.getContainer(), + connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -85,23 +86,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, && destination_data_source_description.is_encrypted == encrypted_in_backup) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); - auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t + auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional &) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. - if (blob_path.size() != 2 || mode != WriteMode::Rewrite) + if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite) throw Exception(ErrorCodes::LOGICAL_ERROR, "Blob writing function called with unexpected blob_path.size={} or mode={}", - blob_path.size(), mode); + dst_blob_path.size(), mode); copyAzureBlobStorageFile( client, destination_disk->getObjectStorage()->getAzureBlobStorageClient(), - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, 0, file_size, - /* dest_container */ blob_path[1], - /* dest_path */ blob_path[0], + /* dest_container */ dst_blob_path[1], + /* dest_path */ dst_blob_path[0], settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure")); @@ -119,28 +120,33 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + if (!attempt_to_create_container) + connection_params.endpoint.container_already_exists = true; + + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); + + object_storage = std::make_unique( + "BackupWriterAzureBlobStorage", + std::move(client_ptr), + std::move(settings_ptr), + connection_params.getContainer(), + connection_params.getConnectionURL()); - object_storage = std::make_unique("BackupWriterAzureBlobStorage", - std::move(client_ptr), - configuration.createSettings(context_), - configuration.container, - configuration_.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } void BackupWriterAzureBlobStorage::copyFileFromDisk( @@ -159,18 +165,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk( { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. - if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) + if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2) { LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); copyAzureBlobStorageFile( src_disk->getObjectStorage()->getAzureBlobStorageClient(), client, - /* src_container */ blob_path[1], - /* src_path */ blob_path[0], + /* src_container */ src_blob_path[1], + /* src_path */ src_blob_path[0], start_pos, length, - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); @@ -188,11 +194,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St copyAzureBlobStorageFile( client, client, - configuration.container, - fs::path(configuration.blob_path)/ source, + connection_params.getContainer(), + fs::path(blob_path)/ source, 0, size, - /* dest_container */ configuration.container, + /* dest_container */ connection_params.getContainer(), /* dest_path */ destination, settings, read_settings, @@ -206,22 +212,28 @@ void BackupWriterAzureBlobStorage::copyDataToFile( UInt64 length) { copyDataToAzureBlobStorageFile( - create_read_buffer, start_pos, length, client, configuration.container, - fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + create_read_buffer, + start_pos, + length, + client, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), + "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -231,7 +243,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -239,7 +251,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, @@ -251,7 +263,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -260,7 +272,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -270,7 +282,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 61688107839..c3b88f245ab 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,12 +1,10 @@ #pragma once - #include "config.h" #if USE_AZURE_BLOB_STORAGE #include #include -#include -#include +#include namespace DB @@ -17,7 +15,8 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: BackupReaderAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -40,16 +39,18 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureConfiguration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: BackupWriterAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -87,9 +88,10 @@ private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureConfiguration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; } diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 03d156d1009..626df99b00c 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -5,6 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureConfiguration configuration; + String blob_path; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); if (!id_arg.empty()) { @@ -59,55 +62,42 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (config.has(config_prefix + ".connection_string")) + connection_params = { - configuration.connection_url = config.getString(config_prefix + ".connection_string"); - configuration.is_connection_string = true; - configuration.container = config.getString(config_prefix + ".container"); - } - else - { - configuration.connection_url = config.getString(config_prefix + ".storage_account_url"); - configuration.is_connection_string = false; - configuration.container = config.getString(config_prefix + ".container"); - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - - if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key")) - { - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - } - } + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true), + }; if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.setPath(args[0].safeGet()); - + blob_path = args[0].safeGet(); } else { if (args.size() == 3) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = !configuration.connection_url.starts_with("http"); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else if (args.size() == 5) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = false; + connection_params.endpoint.storage_account_url = args[0].safeGet(); + connection_params.endpoint.container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); - configuration.account_name = args[3].safeGet(); - configuration.account_key = args[4].safeGet(); + auto account_name = args[3].safeGet(); + auto account_key = args[4].safeGet(); + connection_params.auth_method = std::make_shared(account_name, account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else { @@ -117,16 +107,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.getPath())) + if (hasRegisteredArchiveFileExtension(blob_path)) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - auto path = configuration.getPath(); - auto filename = removeFileNameFromURL(path); - configuration.setPath(path); - - archive_params.archive_name = filename; + archive_params.archive_name = removeFileNameFromURL(blob_path); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; @@ -141,7 +127,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { auto reader = std::make_shared( - configuration, + connection_params, + blob_path, params.allow_azure_native_copy, params.read_settings, params.write_settings, @@ -159,7 +146,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) else { auto writer = std::make_shared( - configuration, + connection_params, + blob_path, params.allow_azure_native_copy, params.read_settings, params.write_settings, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b18207e55ad..d985595154c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -607,6 +607,10 @@ if (TARGET ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch) endif() +if (TARGET ch_contrib::prometheus_protobufs) + dbms_target_link_libraries (PUBLIC ch_contrib::prometheus_protobufs) +endif() + if (TARGET ch_rust::skim) dbms_target_include_directories(PRIVATE $) dbms_target_link_libraries(PUBLIC ch_rust::skim) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 56573c15f32..5d472ba99b9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (local_format_error) std::rethrow_exception(local_format_error); - if (cancelled && is_interactive) - { + if (cancelled && is_interactive && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; - cancelled_printed = true; - } } @@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream() if (is_interactive) { - if (cancelled && !cancelled_printed) + if (cancelled && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; else if (!written_first_block) output_stream << "Ok." << std::endl; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 756400137ad..a260fd5761e 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,13 +6,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -338,8 +338,8 @@ protected: bool allow_repeated_settings = false; bool allow_merge_tree_settings = false; - bool cancelled = false; - bool cancelled_printed = false; + std::atomic_bool cancelled = false; + std::atomic_bool cancelled_printed = false; /// Unpacked descriptors and streams for the ease of use. int in_fd = STDIN_FILENO; diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8c993f906e0..51cbe6f3d6f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -195,6 +195,12 @@ void HedgedConnections::sendQuery( modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..99bdd706d8b 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -150,6 +150,12 @@ void MultiplexedConnections::sendQuery( } } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; size_t num_replicas = replica_states.size(); diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 8a4792f0a5a..d36c7fd08aa 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -12,7 +12,9 @@ #include #include +#include #include +#include #include #include "config.h" @@ -22,24 +24,169 @@ #define STRINGIFY(x) STRINGIFY_HELPER(x) #endif +using namespace DB; namespace DB { namespace ErrorCodes { - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_OPEN_FILE; - extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; +extern const int FILE_DOESNT_EXIST; +extern const int INCORRECT_DATA; } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")) - , wait_time(wait_time_) - , memory_usage_file(log) +} + +namespace { - LOG_INFO(log, "Initialized cgroups memory limit observer, wait time is {} sec", wait_time.count()); + +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t value = 0; + readIntText(value, buf); + return value; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); +} + +struct CgroupsV1Reader : ICgroupsReader +{ + explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") + { + } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + current_buf.rewind(); + stat_buf.rewind(); + + int64_t mem_usage = 0; + /// memory.current contains a single number + /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 + readIntText(mem_usage, current_buf); + mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + chassert(mem_usage >= 0, "Negative memory usage"); + return mem_usage; + } + +private: + std::mutex mutex; + ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + +/// Caveats: +/// - All of the logic in this file assumes that the current process is the only process in the +/// containing cgroup (or more precisely: the only process with significant memory consumption). +/// If this is not the case, then other processe's memory consumption may affect the internal +/// memory tracker ... +/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a +/// decade and will go away at some point, hierarchical detection is only implemented for v2. +/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such +/// systems existed only for a short transition period. + +std::optional getCgroupsV2Path() +{ + if (!cgroupsV2Enabled()) + return {}; + + if (!cgroupsV2MemoryControllerEnabled()) + return {}; + + String cgroup = cgroupV2OfProcess(); + auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + + /// Return the bottom-most nested current memory file. If there is no such file at the current + /// level, try again at the parent level as memory settings are inherited. + while (current_cgroup != default_cgroups_mount.parent_path()) + { + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + return {current_cgroup}; + current_cgroup = current_cgroup.parent_path(); + } + return {}; +} + +std::optional getCgroupsV1Path() +{ + auto path = default_cgroups_mount / "memory/memory.stat"; + if (!std::filesystem::exists(path)) + return {}; + return {default_cgroups_mount / "memory"}; +} + +std::pair getCgroupsPath() +{ + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); +} + +} + +namespace DB +{ + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) +{ + const auto [cgroup_path, version] = getCgroupsPath(); + + if (version == CgroupsVersion::V2) + cgroup_reader = std::make_unique(cgroup_path); + else + cgroup_reader = std::make_unique(cgroup_path); + + LOG_INFO( + log, + "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2", + wait_time.count()); } CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() @@ -79,12 +226,13 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint { LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); -#if USE_JEMALLOC +# if USE_JEMALLOC LOG_INFO(log, "Purging jemalloc arenas"); mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -#endif +# endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -104,152 +252,6 @@ void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmo on_memory_amount_available_changed = on_memory_amount_available_changed_; } -namespace -{ - -/// Caveats: -/// - All of the logic in this file assumes that the current process is the only process in the -/// containing cgroup (or more precisely: the only process with significant memory consumption). -/// If this is not the case, then other processe's memory consumption may affect the internal -/// memory tracker ... -/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a -/// decade and will go away at some point, hierarchical detection is only implemented for v2. -/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such -/// systems existed only for a short transition period. - -std::optional getCgroupsV2FileName() -{ - if (!cgroupsV2Enabled()) - return {}; - - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - - String cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); - - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. - while (current_cgroup != default_cgroups_mount.parent_path()) - { - auto path = current_cgroup / "memory.current"; - if (std::filesystem::exists(path)) - return {path}; - current_cgroup = current_cgroup.parent_path(); - } - return {}; -} - -std::optional getCgroupsV1FileName() -{ - auto path = default_cgroups_mount / "memory/memory.stat"; - if (!std::filesystem::exists(path)) - return {}; - return {path}; -} - -std::pair getCgroupsFileName() -{ - auto v2_file_name = getCgroupsV2FileName(); - if (v2_file_name.has_value()) - return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - - auto v1_file_name = getCgroupsV1FileName(); - if (v1_file_name.has_value()) - return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; - - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); -} - -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) - : log(log_) -{ - std::tie(file_name, version) = getCgroupsFileName(); - - LOG_INFO(log, "Will read the current memory usage from '{}' (cgroups version: {})", file_name, (version == CgroupsVersion::V1) ? "v1" : "v2"); - - fd = ::open(file_name.data(), O_RDONLY); - if (fd == -1) - ErrnoException::throwFromPath( - (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, - file_name, "Cannot open file '{}'", file_name); -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::~MemoryUsageFile() -{ - assert(fd != -1); - if (::close(fd) != 0) - { - try - { - ErrnoException::throwFromPath( - ErrorCodes::CANNOT_CLOSE_FILE, - file_name, "Cannot close file '{}'", file_name); - } - catch (const ErrnoException &) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } -} - -uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const -{ - /// File read is probably not read is thread-safe, just to be sure - std::lock_guard lock(mutex); - - ReadBufferFromFileDescriptor buf(fd); - buf.rewind(); - - uint64_t mem_usage = 0; - - switch (version) - { - case CgroupsVersion::V1: - { - /// Format is - /// kernel 5 - /// rss 15 - /// [...] - std::string key; - bool found_rss = false; - - while (!buf.eof()) - { - readStringUntilWhitespace(key, buf); - if (key != "rss") - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - readIntText(mem_usage, buf); - found_rss = true; - break; - } - - if (!found_rss) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name); - - break; - } - case CgroupsVersion::V2: - { - readIntText(mem_usage, buf); - break; - } - } - - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - - return mem_usage; -} - void CgroupsMemoryUsageObserver::startThread() { if (!thread.joinable()) @@ -301,7 +303,8 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard limit_lock(limit_mutex); if (soft_limit > 0 && hard_limit > 0) { - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index edc1cee750a..b848a2bff3c 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,11 +3,19 @@ #include #include +#include #include namespace DB { +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -61,27 +69,12 @@ private: uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use - /// Represents the cgroup virtual file that shows the memory consumption of the process's cgroup. - struct MemoryUsageFile - { - public: - explicit MemoryUsageFile(LoggerPtr log_); - ~MemoryUsageFile(); - uint64_t readMemoryUsage() const; - private: - LoggerPtr log; - mutable std::mutex mutex; - int fd TSA_GUARDED_BY(mutex) = -1; - CgroupsVersion version; - std::string file_name; - }; - - MemoryUsageFile memory_usage_file; - void stopThread(); void runThread(); + std::unique_ptr cgroup_reader; + std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 922607da813..16b9488c98d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -200,22 +198,18 @@ public: */ bool finish() { - bool was_finished_before = false; - { std::lock_guard lock(queue_mutex); if (is_finished) return true; - was_finished_before = is_finished; is_finished = true; } pop_condition.notify_all(); push_condition.notify_all(); - - return was_finished_before; + return false; } /// Returns if queue is finished diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c9832e8efd5..67d6036aa51 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -316,7 +316,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { - with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp new file mode 100644 index 00000000000..a21efe62fb6 --- /dev/null +++ b/src/Common/Coverage.cpp @@ -0,0 +1,65 @@ +#include + +#if defined(SANITIZE_COVERAGE) + +#include +#include + +#include +#include + +#include +#include + +#include + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + diff --git a/src/Common/Coverage.h b/src/Common/Coverage.h new file mode 100644 index 00000000000..aa6dd2825ed --- /dev/null +++ b/src/Common/Coverage.h @@ -0,0 +1,5 @@ +#pragma once + +#if defined(SANITIZE_COVERAGE) +void dumpCoverage(); +#endif diff --git a/src/Common/EnvironmentChecks.cpp b/src/Common/EnvironmentChecks.cpp new file mode 100644 index 00000000000..d69e8cbaa3d --- /dev/null +++ b/src/Common/EnvironmentChecks.cpp @@ -0,0 +1,234 @@ +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +namespace +{ + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + +} + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif diff --git a/src/Common/EnvironmentChecks.h b/src/Common/EnvironmentChecks.h new file mode 100644 index 00000000000..6d355a69ff9 --- /dev/null +++ b/src/Common/EnvironmentChecks.h @@ -0,0 +1,5 @@ +#pragma once + +#if !defined(USE_MUSL) +void checkHarmfulEnvironmentVariables(char ** argv); +#endif diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 488f8e2c5dc..48fbd07ec34 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,9 +57,12 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 50000; + opts.SampleRate = 10000; + + const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) + if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") + opts.Backtrace = getBackTrace; - opts.Backtrace = getBackTrace; GuardedAlloc.init(opts); return true; diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index fbe2f62c944..d7cc246db6a 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -46,6 +46,20 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); @@ -58,7 +72,7 @@ void setJemallocProfileActive(bool value) return; } - mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + setJemallocValue("prof.active", value); LOG_TRACE(getLogger("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); } @@ -84,6 +98,16 @@ std::string flushJemallocProfile(const std::string & file_prefix) return profile_dump_path; } +void setJemallocBackgroundThreads(bool enabled) +{ + setJemallocValue("background_thread", enabled); +} + +void setJemallocMaxBackgroundThreads(size_t max_threads) +{ + setJemallocValue("max_background_threads", max_threads); +} + } #endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 80ff0f1a319..499a906fd3d 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -17,6 +17,10 @@ void setJemallocProfileActive(bool value); std::string flushJemallocProfile(const std::string & file_prefix); +void setJemallocBackgroundThreads(bool enabled); + +void setJemallocMaxBackgroundThreads(size_t max_threads); + } #endif diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index a1058a879bd..439965a92fb 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -238,7 +238,12 @@ \ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ - M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ + M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ + M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ + \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ @@ -447,14 +452,18 @@ The server successfully detected this situation and will download merged part fr M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ M(AzureGetObject, "Number of Azure API GetObject calls.") \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureUpload, "Number of Azure blob storage API Upload calls") \ + M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \ + M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \ + M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \ + M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 97% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index f5b700ea529..161c38f20e0 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (out_stream) + { + *out_stream << std::endl; + + WriteBufferFromOStream ast_buf(*out_stream, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + *out_stream << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 91% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..35d088809f2 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,9 +35,31 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; +public: + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -66,10 +88,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -77,9 +95,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -102,8 +117,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Common/config.h.in b/src/Common/config.h.in index ad2ca2652d1..f68701d5d10 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -63,6 +63,7 @@ #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT +#cmakedefine01 USE_PROMETHEUS_PROTOBUFS /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index bb83e0381ef..abeda95ed0d 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 68e0131c91b..2e7aa0d086f 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -185,7 +185,6 @@ void registerCodecDeflateQpl(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory); @@ -193,7 +192,6 @@ void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); void registerCodecGCD(CompressionCodecFactory & factory); -#endif CompressionCodecFactory::CompressionCodecFactory() { @@ -205,7 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() #endif registerCodecLZ4HC(*this); registerCodecMultiple(*this); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); registerCodecDoubleDelta(*this); @@ -216,7 +213,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); -#endif default_codec = get("LZ4", {}); } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 2e8dbe75e90..c9b45d9a344 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,7 @@ #include #include +#include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 82b30a0b5f6..2a53bade62f 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,6 +2,7 @@ #include "config.h" +#include #include #include #include diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index ff26b3171ea..b4241235cc7 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -150,12 +150,18 @@ M(S3PutObject) \ M(S3GetObject) \ \ - M(AzureUploadPart) \ - M(DiskAzureUploadPart) \ + M(AzureUpload) \ + M(DiskAzureUpload) \ + M(AzureStageBlock) \ + M(DiskAzureStageBlock) \ + M(AzureCommitBlockList) \ + M(DiskAzureCommitBlockList) \ M(AzureCopyObject) \ M(DiskAzureCopyObject) \ M(AzureDeleteObjects) \ + M(DiskAzureDeleteObjects) \ M(AzureListObjects) \ + M(DiskAzureListObjects) \ \ M(DiskS3DeleteObjects) \ M(DiskS3CopyObject) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 925ac9a4efe..f36b1ef151f 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -334,19 +334,13 @@ void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); const auto & shutdown_called = keeper_context->isShutdownCalled(); - while (!shutdown_called) + CreateSnapshotTask task; + while (snapshots_queue.pop(task)) { - CreateSnapshotTask task; - if (!snapshots_queue.pop(task)) - break; - try { auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); - if (shutdown_called) - break; - if (!snapshot_file_info) continue; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 88f708ab4ae..e7cae714ba6 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -597,7 +597,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); - bool ret = true; + bool ret = false; if (!execute_only_cleanup) { try @@ -627,7 +627,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk( + *snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } @@ -640,13 +641,14 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res latest_snapshot_info->path); } } + + ret = true; } catch (...) { ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); LOG_TRACE(log, "Exception happened during snapshot"); tryLogCurrentException(log); - ret = false; } } { diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp deleted file mode 100644 index 2017adcc58d..00000000000 --- a/src/Coordination/Standalone/Context.cpp +++ /dev/null @@ -1,486 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace ProfileEvents -{ - extern const Event ContextLock; - extern const Event ContextLockWaitMicroseconds; -} - -namespace CurrentMetrics -{ - extern const Metric ContextLockWait; - extern const Metric BackgroundSchedulePoolTask; - extern const Metric BackgroundSchedulePoolSize; - extern const Metric IOWriterThreads; - extern const Metric IOWriterThreadsActive; - extern const Metric IOWriterThreadsScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNSUPPORTED_METHOD; -} - -struct ContextSharedPart : boost::noncopyable -{ - ContextSharedPart() - : macros(std::make_unique()) - {} - - ~ContextSharedPart() - { - if (keeper_dispatcher) - { - try - { - keeper_dispatcher->shutdown(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - /// Wait for thread pool for background reads and writes, - /// since it may use per-user MemoryTracker which will be destroyed here. - if (asynchronous_remote_fs_reader) - { - try - { - asynchronous_remote_fs_reader->wait(); - asynchronous_remote_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (asynchronous_local_fs_reader) - { - try - { - asynchronous_local_fs_reader->wait(); - asynchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (synchronous_local_fs_reader) - { - try - { - synchronous_local_fs_reader->wait(); - synchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (threadpool_writer) - { - try - { - threadpool_writer->wait(); - threadpool_writer.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - /// For access of most of shared objects. - mutable SharedMutex mutex; - - ServerSettings server_settings; - - String path; /// Path to the data directory, with a slash at the end. - ConfigurationPtr config; /// Global configuration settings. - MultiVersion macros; /// Substitutions extracted from config. - OnceFlag schedule_pool_initialized; - mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml - - mutable OnceFlag readers_initialized; - mutable std::unique_ptr asynchronous_remote_fs_reader; - mutable std::unique_ptr asynchronous_local_fs_reader; - mutable std::unique_ptr synchronous_local_fs_reader; - -#if USE_LIBURING - mutable OnceFlag io_uring_reader_initialized; - mutable std::unique_ptr io_uring_reader; -#endif - - mutable OnceFlag threadpool_writer_initialized; - mutable std::unique_ptr threadpool_writer; - - mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads - mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes - - mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads - mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes - - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage - - mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); -}; - -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; - -Context::Context() = default; -Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} -Context::~Context() = default; - -SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; -SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default; -SharedContextHolder::SharedContextHolder() = default; -SharedContextHolder::~SharedContextHolder() = default; -SharedContextHolder::SharedContextHolder(std::unique_ptr shared_context) - : shared(std::move(shared_context)) {} - -void SharedContextHolder::reset() { shared.reset(); } - -void Context::makeGlobalContext() -{ - initGlobal(); - global_context = shared_from_this(); -} - -ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) -{ - auto res = std::shared_ptr(new Context); - res->shared = shared_part; - return res; -} - -void Context::initGlobal() -{ - assert(!global_context_instance); - global_context_instance = shared_from_this(); -} - -SharedContextHolder Context::createShared() -{ - return SharedContextHolder(std::make_unique()); -} - - -ContextMutablePtr Context::getGlobalContext() const -{ - auto ptr = global_context.lock(); - if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); - return ptr; -} - -std::unique_lock Context::getGlobalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getGlobalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::unique_lock Context::getLocalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getLocalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -String Context::getPath() const -{ - auto lock = getGlobalSharedLock(); - return shared->path; -} - -void Context::setPath(const String & path) -{ - auto lock = getGlobalLock(); - shared->path = path; -} - -MultiVersion::Version Context::getMacros() const -{ - return shared->macros.get(); -} - -void Context::setMacros(std::unique_ptr && macros) -{ - shared->macros.set(std::move(macros)); -} - -BackgroundSchedulePool & Context::getSchedulePool() const -{ - callOnce(shared->schedule_pool_initialized, [&] { - shared->schedule_pool = std::make_unique( - shared->server_settings.background_schedule_pool_size, - CurrentMetrics::BackgroundSchedulePoolTask, - CurrentMetrics::BackgroundSchedulePoolSize, - "BgSchPool"); - }); - - return *shared->schedule_pool; -} - -void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) -{ - shared->remote_host_filter.setValuesFromConfig(config); -} - -const RemoteHostFilter & Context::getRemoteHostFilter() const -{ - return shared->remote_host_filter; -} - -IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const -{ - callOnce(shared->readers_initialized, [&] { - const auto & config = getConfigRef(); - shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config); - shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config); - shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config); - }); - - switch (type) - { - case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: - return *shared->asynchronous_remote_fs_reader; - case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: - return *shared->asynchronous_local_fs_reader; - case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: - return *shared->synchronous_local_fs_reader; - } -} - -#if USE_LIBURING -IOUringReader & Context::getIOUringReader() const -{ - callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader(); - }); - - return *shared->io_uring_reader; -} -#endif - -std::shared_ptr Context::getFilesystemCacheLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getFilesystemReadPrefetchesLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getBlobStorageLog() const -{ - return nullptr; -} - -void Context::setConfig(const ConfigurationPtr & config) -{ - auto lock = getGlobalLock(); - shared->config = config; -} - -const Poco::Util::AbstractConfiguration & Context::getConfigRef() const -{ - auto lock = getGlobalSharedLock(); - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); -} - -std::shared_ptr Context::getAsyncReadCounters() const -{ - auto lock = getLocalLock(); - if (!async_read_counters) - async_read_counters = std::make_shared(); - return async_read_counters; -} - -ThreadPool & Context::getThreadPoolWriter() const -{ - callOnce(shared->threadpool_writer_initialized, [&] { - const auto & config = getConfigRef(); - auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); - auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); - - shared->threadpool_writer = std::make_unique( - CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size); - }); - - return *shared->threadpool_writer; -} - -ThrottlerPtr Context::getRemoteReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getRemoteWriteThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalWriteThrottler() const -{ - return nullptr; -} - -ReadSettings Context::getReadSettings() const -{ - return ReadSettings{}; -} - -ResourceManagerPtr Context::getResourceManager() const -{ - return nullptr; -} - -ClassifierPtr Context::getWorkloadClassifier() const -{ - return nullptr; -} - -void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const -{ - const auto & config_ref = getConfigRef(); - - std::lock_guard lock(shared->keeper_dispatcher_mutex); - - if (shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); - - if (config_ref.has("keeper_server")) - { - shared->keeper_dispatcher = std::make_shared(); - shared->keeper_dispatcher->initialize(config_ref, true, start_async, getMacros()); - } -} - -std::shared_ptr Context::getKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); - - return shared->keeper_dispatcher; -} - -std::shared_ptr Context::tryGetKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - return shared->keeper_dispatcher; -} - -void Context::shutdownKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (shared->keeper_dispatcher) - { - shared->keeper_dispatcher->shutdown(); - shared->keeper_dispatcher.reset(); - } -} - -void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - return; - - shared->keeper_dispatcher->updateConfiguration(config_, getMacros()); -} - -std::shared_ptr Context::getZooKeeper() const -{ - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); -} - -const S3SettingsByEndpoint & Context::getStorageS3Settings() const -{ - std::lock_guard lock(shared->mutex); - - if (!shared->storage_s3_settings) - { - const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); - shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); - } - - return *shared->storage_s3_settings; -} - -const ServerSettings & Context::getServerSettings() const -{ - return shared->server_settings; -} - -bool Context::hasTraceCollector() const -{ - return false; -} - -bool Context::isBackgroundOperationContext() const -{ - return false; -} - -} diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h deleted file mode 100644 index d3bbfececed..00000000000 --- a/src/Coordination/Standalone/Context.h +++ /dev/null @@ -1,178 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include - -#include "config.h" -namespace zkutil -{ - class ZooKeeper; - using ZooKeeperPtr = std::shared_ptr; -} - -namespace DB -{ - -struct ContextSharedPart; -class Macros; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class BlobStorageLog; -class IOUringReader; -class S3SettingsByEndpoint; - -/// A small class which owns ContextShared. -/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. -struct SharedContextHolder -{ - ~SharedContextHolder(); - SharedContextHolder(); - explicit SharedContextHolder(std::unique_ptr shared_context); - SharedContextHolder(SharedContextHolder &&) noexcept; - - SharedContextHolder & operator=(SharedContextHolder &&) noexcept; - - ContextSharedPart * get() const { return shared.get(); } - void reset(); -private: - std::unique_ptr shared; -}; - -class ContextData -{ -protected: - ContextWeakMutablePtr global_context; - inline static ContextPtr global_context_instance; - ContextSharedPart * shared; - - /// Query metrics for reading data asynchronously with IAsynchronousReader. - mutable std::shared_ptr async_read_counters; - - Settings settings; /// Setting for query execution. - -public: - /// Use copy constructor or createGlobal() instead - ContextData(); - ContextData(const ContextData &); -}; - -class Context : public ContextData, public std::enable_shared_from_this -{ -private: - /// ContextData mutex - mutable SharedMutex mutex; - - Context(); - Context(const Context &); - - std::unique_lock getGlobalLock() const; - - std::shared_lock getGlobalSharedLock() const; - - std::unique_lock getLocalLock() const; - - std::shared_lock getLocalSharedLock() const; - -public: - /// Create initial Context with ContextShared and etc. - static ContextMutablePtr createGlobal(ContextSharedPart * shared_part); - static SharedContextHolder createShared(); - - ContextMutablePtr getGlobalContext() const; - static ContextPtr getGlobalContextInstance() { return global_context_instance; } - - void makeGlobalContext(); - void initGlobal(); - - ~Context(); - - using ConfigurationPtr = Poco::AutoPtr; - - /// Global application configuration settings. - void setConfig(const ConfigurationPtr & config); - const Poco::Util::AbstractConfiguration & getConfigRef() const; - - const Settings & getSettingsRef() const { return settings; } - - String getPath() const; - void setPath(const String & path); - - MultiVersion::Version getMacros() const; - void setMacros(std::unique_ptr && macros); - - BackgroundSchedulePool & getSchedulePool() const; - - /// Storage of allowed hosts from config.xml - void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - const RemoteHostFilter & getRemoteHostFilter() const; - - std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getFilesystemReadPrefetchesLog() const; - std::shared_ptr getBlobStorageLog() const; - - enum class ApplicationType : uint8_t - { - KEEPER, - SERVER, - }; - - void setApplicationType(ApplicationType) {} - ApplicationType getApplicationType() const { return ApplicationType::KEEPER; } - - IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; -#if USE_LIBURING - IOUringReader & getIOUringReader() const; -#endif - std::shared_ptr getAsyncReadCounters() const; - ThreadPool & getThreadPoolWriter() const; - - ThrottlerPtr getRemoteReadThrottler() const; - ThrottlerPtr getRemoteWriteThrottler() const; - - ThrottlerPtr getLocalReadThrottler() const; - ThrottlerPtr getLocalWriteThrottler() const; - - ReadSettings getReadSettings() const; - - /// Resource management related - ResourceManagerPtr getResourceManager() const; - ClassifierPtr getWorkloadClassifier() const; - - std::shared_ptr getKeeperDispatcher() const; - std::shared_ptr tryGetKeeperDispatcher() const; - void initializeKeeperDispatcher(bool start_async) const; - void shutdownKeeperDispatcher() const; - void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); - - zkutil::ZooKeeperPtr getZooKeeper() const; - - const S3SettingsByEndpoint & getStorageS3Settings() const; - - const String & getUserName() const { static std::string user; return user; } - - const ServerSettings & getServerSettings() const; - - bool hasTraceCollector() const; - - bool isBackgroundOperationContext() const; -}; - -} diff --git a/src/Coordination/Standalone/Settings.cpp b/src/Coordination/Standalone/Settings.cpp deleted file mode 100644 index 12a7a42ffac..00000000000 --- a/src/Coordination/Standalone/Settings.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -namespace DB -{ - -IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) - -std::vector Settings::getAllRegisteredNames() const -{ - std::vector all_settings; - for (const auto & setting_field : all()) - { - all_settings.push_back(setting_field.getName()); - } - return all_settings; -} - -void Settings::set(std::string_view name, const Field & value) -{ - BaseSettings::set(name, value); -} - - -} diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp deleted file mode 100644 index fc78233d9dc..00000000000 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -namespace DB -{ - -void CurrentThread::detachFromGroupIfNotDetached() -{ -} - -void CurrentThread::attachToGroup(const ThreadGroupPtr &) -{ -} - -void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) -{ -} - -} diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 68ac45fa24f..d473810bcb8 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b2b71677cb1..ce6fabc092a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -125,6 +125,9 @@ class IColumn; M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ @@ -400,7 +403,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ @@ -613,6 +616,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ @@ -624,7 +628,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ @@ -733,6 +737,7 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ + M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 723daa6ad22..28ffc18f6dc 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,8 +59,14 @@ static std::initializer_listhandleCrash(); -#endif /// Send crash report to developers (if configured) if (sig != SanitizerTrap) @@ -533,8 +531,6 @@ private: } } - /// ClickHouse Keeper does not link to some parts of Settings. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) { @@ -549,7 +545,6 @@ private: LOG_FATAL(log, "Changed settings: {}", changed_settings); } } -#endif /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 9479dd65730..c51a1100639 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -19,7 +19,7 @@ #include "config.h" #include -#if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_SENTRY # include # include diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index a4c8ed1a241..e96937d522d 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; - extern const int ILLEGAL_INDEX; + extern const int ARGUMENT_OUT_OF_BOUND; extern const int LOGICAL_ERROR; } @@ -286,7 +286,7 @@ std::optional DataTypeTuple::tryGetPositionByName(const String & name) c String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size()); return names[i - 1]; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index c77709c27eb..bb9761a3905 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -78,7 +78,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c std::unique_ptr buf; -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -96,7 +95,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c /* read_until_position */std::nullopt, cache_log); } -#endif /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the /// former doesn't support seeks. diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index a2d21cf49c2..60fa2997c50 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -14,6 +14,15 @@ namespace ProfileEvents { extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerSleepMicroseconds; + + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; + + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; + } namespace DB @@ -30,7 +39,7 @@ struct WriteBufferFromAzureBlobStorage::PartData size_t data_size = 0; }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureBlobStorage::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -48,7 +57,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) @@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize() /// then we use single part upload instead of multi part upload if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) { + ProfileEvents::increment(ProfileEvents::AzureUpload); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureUpload); + auto part_data = std::move(detached_part_data.front()); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data.memory.data()), part_data.data_size); @@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } @@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage: auto & data_block_id = std::get<0>(*worker_data); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(std::get<1>(*worker_data).memory.data()), data_size); execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); }; diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 10fe871a727..3ee497c4e44 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -35,7 +35,7 @@ public: const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp deleted file mode 100644 index 1a5388349f8..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ /dev/null @@ -1,272 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace Azure::Storage::Blobs; - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - - -void validateStorageAccountUrl(const String & storage_account_url) -{ - const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; - static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); - - if (!re2::RE2::FullMatch(storage_account_url, storage_account_url_pattern)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); -} - - -void validateContainerName(const String & container_name) -{ - auto len = container_name.length(); - if (len < 3 || len > 64) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); - - const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; - static const RE2 container_name_pattern(container_name_pattern_str); - - if (!re2::RE2::FullMatch(container_name, container_name_pattern)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", - container_name_pattern_str, container_name); -} - - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - String storage_url; - String account_name; - String container_name; - String prefix; - if (config.has(config_prefix + ".endpoint")) - { - String endpoint = config.getString(config_prefix + ".endpoint"); - - /// For some authentication methods account name is not present in the endpoint - /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true) - bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true); - - size_t pos = endpoint.find("//"); - if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint"); - - if (endpoint_contains_account_name) - { - size_t acc_pos_begin = endpoint.find('/', pos+2); - if (acc_pos_begin == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); - - storage_url = endpoint.substr(0,acc_pos_begin); - size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1); - - if (acc_pos_end == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - - account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1); - - size_t cont_pos_end = endpoint.find('/', acc_pos_end+1); - - if (cont_pos_end != std::string::npos) - { - container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1); - prefix = endpoint.substr(cont_pos_end+1); - } - else - { - container_name = endpoint.substr(acc_pos_end+1); - } - } - else - { - size_t cont_pos_begin = endpoint.find('/', pos+2); - - if (cont_pos_begin == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - - storage_url = endpoint.substr(0,cont_pos_begin); - size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1); - - if (cont_pos_end != std::string::npos) - { - container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1); - prefix = endpoint.substr(cont_pos_end+1); - } - else - { - container_name = endpoint.substr(cont_pos_begin+1); - } - } - } - else if (config.has(config_prefix + ".connection_string")) - { - storage_url = config.getString(config_prefix + ".connection_string"); - container_name = config.getString(config_prefix + ".container_name"); - } - else if (config.has(config_prefix + ".storage_account_url")) - { - storage_url = config.getString(config_prefix + ".storage_account_url"); - validateStorageAccountUrl(storage_url); - container_name = config.getString(config_prefix + ".container_name"); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); - - if (!container_name.empty()) - validateContainerName(container_name); - std::optional container_already_exists {}; - if (config.has(config_prefix + ".container_already_exists")) - container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; - return {storage_url, account_name, container_name, prefix, container_already_exists}; -} - - -template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); -} - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); -} - -template -std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, - const String & container_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Azure::Storage::Blobs::BlobClientOptions & client_options) -{ - std::string connection_str; - if (config.has(config_prefix + ".connection_string")) - connection_str = config.getString(config_prefix + ".connection_string"); - - if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name, client_options); - - if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) - { - auto storage_shared_key_credential = std::make_shared( - config.getString(config_prefix + ".account_name"), - config.getString(config_prefix + ".account_key") - ); - return std::make_unique(url, storage_shared_key_credential, client_options); - } - - if (config.getBool(config_prefix + ".use_workload_identity", false)) - { - auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential, client_options); - } - - auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential, client_options); -} - -Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - Azure::Core::Http::Policies::RetryOptions retry_options; - retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); - retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); - - using CurlOptions = Azure::Core::Http::CurlTransportOptions; - CurlOptions curl_options; - curl_options.NoSignal = true; - - if (config.has(config_prefix + ".curl_ip_resolve")) - { - auto value = config.getString(config_prefix + ".curl_ip_resolve"); - if (value == "ipv4") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; - else if (value == "ipv6") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); - } - - Azure::Storage::Blobs::BlobClientOptions client_options; - client_options.Retry = retry_options; - client_options.Transport.Transport = std::make_shared(curl_options); - - client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}; - - return client_options; -} - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - auto container_name = endpoint.container_name; - auto final_url = endpoint.getEndpoint(); - auto client_options = getAzureBlobClientOptions(config, config_prefix); - - if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - - auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); - - try - { - return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); - } - catch (const Azure::Storage::StorageException & e) - { - /// If container_already_exists is not set (in config), ignore already exists error. - /// (Conflict - The specified container already exists) - if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - throw; - } -} - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - std::unique_ptr settings = std::make_unique(); - settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); - settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); - settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); - settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); - settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); - settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); - settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); - settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); - settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); - settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); - settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); - settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); - settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); - settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); - settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); - - return settings; -} - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h deleted file mode 100644 index e4775a053c1..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -namespace DB -{ - -struct AzureBlobStorageEndpoint -{ - const String storage_account_url; - const String account_name; - const String container_name; - const String prefix; - const std::optional container_already_exists; - - String getEndpoint() - { - String url = storage_account_url; - if (url.ends_with('/')) - url.pop_back(); - - if (!account_name.empty()) - url += "/" + account_name; - - if (!container_name.empty()) - url += "/" + container_name; - - if (!prefix.empty()) - url += "/" + prefix; - - return url; - } - - String getEndpointWithoutContainer() - { - String url = storage_account_url; - - if (!account_name.empty()) - url += "/" + account_name; - - return url; - } -}; - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp new file mode 100644 index 00000000000..d9dfedadd48 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -0,0 +1,351 @@ +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace AzureBlobStorage +{ + +static void validateStorageAccountUrl(const String & storage_account_url) +{ + const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; + static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); + + if (!re2::RE2::FullMatch(storage_account_url, storage_account_url_pattern)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); +} + +static void validateContainerName(const String & container_name) +{ + auto len = container_name.length(); + if (len < 3 || len > 64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); + + const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; + static const RE2 container_name_pattern(container_name_pattern_str); + + if (!re2::RE2::FullMatch(container_name, container_name_pattern)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", + container_name_pattern_str, container_name); +} + +static bool isConnectionString(const std::string & candidate) +{ + return !candidate.starts_with("http"); +} + +String ConnectionParams::getConnectionURL() const +{ + if (std::holds_alternative(auth_method)) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(endpoint.storage_account_url); + return parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl(); + } + + return endpoint.storage_account_url; +} + +std::unique_ptr ConnectionParams::createForService() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ServiceClient::CreateFromConnectionString(auth.toUnderType(), client_options)); + else + return std::make_unique(endpoint.getEndpointWithoutContainer(), auth, client_options); + }, auth_method); +} + +std::unique_ptr ConnectionParams::createForContainer() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ContainerClient::CreateFromConnectionString(auth.toUnderType(), endpoint.container_name, client_options)); + else + return std::make_unique(endpoint.getEndpoint(), auth, client_options); + }, auth_method); +} + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + String storage_url; + String account_name; + String container_name; + String prefix; + + auto get_container_name = [&] + { + if (config.has(config_prefix + ".container_name")) + return config.getString(config_prefix + ".container_name"); + + if (config.has(config_prefix + ".container")) + return config.getString(config_prefix + ".container"); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config"); + }; + + if (config.has(config_prefix + ".endpoint")) + { + String endpoint = config.getString(config_prefix + ".endpoint"); + + /// For some authentication methods account name is not present in the endpoint + /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true) + bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true); + + size_t pos = endpoint.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint"); + + if (endpoint_contains_account_name) + { + size_t acc_pos_begin = endpoint.find('/', pos + 2); + if (acc_pos_begin == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); + + storage_url = endpoint.substr(0, acc_pos_begin); + size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1); + + if (acc_pos_end == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); + + account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1); + + size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1); + + if (cont_pos_end != std::string::npos) + { + container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1); + prefix = endpoint.substr(cont_pos_end + 1); + } + else + { + container_name = endpoint.substr(acc_pos_end + 1); + } + } + else + { + size_t cont_pos_begin = endpoint.find('/', pos + 2); + + if (cont_pos_begin == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); + + storage_url = endpoint.substr(0, cont_pos_begin); + size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1); + + if (cont_pos_end != std::string::npos) + { + container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1); + prefix = endpoint.substr(cont_pos_end + 1); + } + else + { + container_name = endpoint.substr(cont_pos_begin + 1); + } + } + } + else if (config.has(config_prefix + ".connection_string")) + { + storage_url = config.getString(config_prefix + ".connection_string"); + container_name = get_container_name(); + } + else if (config.has(config_prefix + ".storage_account_url")) + { + storage_url = config.getString(config_prefix + ".storage_account_url"); + validateStorageAccountUrl(storage_url); + container_name = get_container_name(); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); + + if (!container_name.empty()) + validateContainerName(container_name); + + std::optional container_already_exists {}; + if (config.has(config_prefix + ".container_already_exists")) + container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; + + return {storage_url, account_name, container_name, prefix, "", container_already_exists}; +} + +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method) +{ + endpoint.container_name = container_name; + + if (isConnectionString(url)) + { + endpoint.storage_account_url = url; + auth_method = ConnectionString{url}; + return; + } + + auto pos = url.find('?'); + + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + endpoint.storage_account_url = url; + auth_method = std::make_shared(); + } + else + { + endpoint.storage_account_url = url.substr(0, pos); + endpoint.sas_auth = url.substr(pos + 1); + auth_method = std::make_shared(); + } +} + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly) +{ + if (params.endpoint.container_already_exists.value_or(false) || readonly) + return params.createForContainer(); + + try + { + auto service_client = params.createForService(); + return std::make_unique(service_client->CreateBlobContainer(params.endpoint.container_name).Value); + } + catch (const Azure::Storage::StorageException & e) + { + /// If container_already_exists is not set (in config), ignore already exists error. + /// (Conflict - The specified container already exists) + if (!params.endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + return params.createForContainer(); + throw; + } +} + +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + { + return std::make_shared( + config.getString(config_prefix + ".account_name"), + config.getString(config_prefix + ".account_key") + ); + } + + if (config.has(config_prefix + ".connection_string")) + return ConnectionString{config.getString(config_prefix + ".connection_string")}; + + if (config.getBool(config_prefix + ".use_workload_identity", false)) + return std::make_shared(); + + return std::make_shared(); +} + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = static_cast(settings.sdk_max_retries); + retry_options.RetryDelay = std::chrono::milliseconds(settings.sdk_retry_initial_backoff_ms); + retry_options.MaxRetryDelay = std::chrono::milliseconds(settings.sdk_retry_max_backoff_ms); + + Azure::Core::Http::CurlTransportOptions curl_options; + curl_options.NoSignal = true; + curl_options.IPResolve = settings.curl_ip_resolve; + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=for_disk}; + + return client_options; +} + +std::unique_ptr getRequestSettings(const Settings & query_settings) +{ + auto settings = std::make_unique(); + + settings->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings->max_single_download_retries = query_settings.azure_max_single_read_retries; + settings->list_object_keys_size = query_settings.azure_list_object_keys_size; + settings->min_upload_part_size = query_settings.azure_min_upload_part_size; + settings->max_upload_part_size = query_settings.azure_max_upload_part_size; + settings->max_single_part_copy_size = query_settings.azure_max_single_part_copy_size; + settings->max_blocks_in_multipart_upload = query_settings.azure_max_blocks_in_multipart_upload; + settings->max_unexpected_write_error_retries = query_settings.azure_max_unexpected_write_error_retries; + settings->max_inflight_parts_for_one_file = query_settings.azure_max_inflight_parts_for_one_file; + settings->strict_upload_part_size = query_settings.azure_strict_upload_part_size; + settings->upload_part_size_multiply_factor = query_settings.azure_upload_part_size_multiply_factor; + settings->upload_part_size_multiply_parts_count_threshold = query_settings.azure_upload_part_size_multiply_parts_count_threshold; + settings->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + + return settings; +} + +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy) +{ + auto settings = getRequestSettings(query_settings); + settings->use_native_copy = use_native_copy; + return settings; +} + +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + auto settings = std::make_unique(); + const auto & settings_ref = context->getSettingsRef(); + + settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); + settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); + + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", settings_ref.azure_max_single_part_upload_size); + settings->max_single_read_retries = config.getUInt64(config_prefix + ".max_single_read_retries", settings_ref.azure_max_single_read_retries); + settings->max_single_download_retries = config.getUInt64(config_prefix + ".max_single_download_retries", settings_ref.azure_max_single_read_retries); + settings->list_object_keys_size = config.getUInt64(config_prefix + ".list_object_keys_size", settings_ref.azure_list_object_keys_size); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", settings_ref.azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", settings_ref.azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", settings_ref.azure_max_single_part_copy_size); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", settings_ref.azure_max_blocks_in_multipart_upload); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", settings_ref.azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", settings_ref.azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", settings_ref.azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", settings_ref.azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", settings_ref.azure_upload_part_size_multiply_parts_count_threshold); + + settings->sdk_max_retries = config.getUInt64(config_prefix + ".max_tries", settings_ref.azure_sdk_max_retries); + settings->sdk_retry_initial_backoff_ms = config.getUInt64(config_prefix + ".retry_initial_backoff_ms", settings_ref.azure_sdk_retry_initial_backoff_ms); + settings->sdk_retry_max_backoff_ms = config.getUInt64(config_prefix + ".retry_max_backoff_ms", settings_ref.azure_sdk_retry_max_backoff_ms); + + if (config.has(config_prefix + ".curl_ip_resolve")) + { + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } + + return settings; +} + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h new file mode 100644 index 00000000000..19ba48ea225 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -0,0 +1,138 @@ +#pragma once +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace AzureBlobStorage +{ + +using ServiceClient = Azure::Storage::Blobs::BlobServiceClient; +using ContainerClient = Azure::Storage::Blobs::BlobContainerClient; +using BlobClient = Azure::Storage::Blobs::BlobClient; +using BlobClientOptions = Azure::Storage::Blobs::BlobClientOptions; + +struct RequestSettings +{ + RequestSettings() = default; + + size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + size_t min_bytes_for_seek = 1024 * 1024; + size_t max_single_read_retries = 3; + size_t max_single_download_retries = 3; + size_t list_object_keys_size = 1000; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t max_single_part_copy_size = 256 * 1024 * 1024; + size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t sdk_max_retries = 10; + size_t sdk_retry_initial_backoff_ms = 10; + size_t sdk_retry_max_backoff_ms = 1000; + bool use_native_copy = false; + + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; +}; + +struct Endpoint +{ + String storage_account_url; + String account_name; + String container_name; + String prefix; + String sas_auth; + std::optional container_already_exists; + + String getEndpoint() const + { + String url = storage_account_url; + if (url.ends_with('/')) + url.pop_back(); + + if (!account_name.empty()) + url += "/" + account_name; + + if (!container_name.empty()) + url += "/" + container_name; + + if (!prefix.empty()) + url += "/" + prefix; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } + + String getEndpointWithoutContainer() const + { + String url = storage_account_url; + + if (!account_name.empty()) + url += "/" + account_name; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } +}; + +using ConnectionString = StrongTypedef; + +using AuthMethod = std::variant< + ConnectionString, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + +struct ConnectionParams +{ + Endpoint endpoint; + AuthMethod auth_method; + BlobClientOptions client_options; + + String getContainer() const { return endpoint.container_name; } + String getConnectionURL() const; + + std::unique_ptr createForService() const; + std::unique_ptr createForContainer() const; +}; + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method); + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly); + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + +std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy); +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 0ebe885a3e7..bc16955143b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ private: AzureObjectStorage::AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_, const String & description_) @@ -397,24 +397,49 @@ void AzureObjectStorage::copyObject( /// NOLINT } void AzureObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context, const ApplyNewSettingsOptions &) + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context, + const ApplyNewSettingsOptions & options) { - auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); settings.set(std::move(new_settings)); - /// We don't update client + + if (!options.allow_client_change) + return; + + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*settings.get(), is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + client.set(std::move(new_client)); } -std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +std::unique_ptr AzureObjectStorage::cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) { - return std::make_unique( - name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace, - description - ); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + return std::make_unique(name, std::move(new_client), std::move(new_settings), new_namespace, params.endpoint.getEndpointWithoutContainer()); } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index c342929d656..2c7ce5e18dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace Poco { @@ -16,71 +18,15 @@ class Logger; namespace DB { -struct AzureObjectStorageSettings -{ - AzureObjectStorageSettings( - uint64_t max_single_part_upload_size_, - uint64_t min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int list_object_keys_size_, - size_t min_upload_part_size_, - size_t max_upload_part_size_, - size_t max_single_part_copy_size_, - bool use_native_copy_, - size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_, - size_t strict_upload_part_size_, - size_t upload_part_size_multiply_factor_, - size_t upload_part_size_multiply_parts_count_threshold_) - : max_single_part_upload_size(max_single_part_upload_size_) - , min_bytes_for_seek(min_bytes_for_seek_) - , max_single_read_retries(max_single_read_retries_) - , max_single_download_retries(max_single_download_retries_) - , list_object_keys_size(list_object_keys_size_) - , min_upload_part_size(min_upload_part_size_) - , max_upload_part_size(max_upload_part_size_) - , max_single_part_copy_size(max_single_part_copy_size_) - , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) - , strict_upload_part_size(strict_upload_part_size_) - , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) - , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) - { - } - - AzureObjectStorageSettings() = default; - - size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; - size_t max_single_read_retries = 3; - size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; - size_t max_unexpected_write_error_retries = 4; - size_t max_inflight_parts_for_one_file = 20; - size_t max_blocks_in_multipart_upload = 50000; - size_t strict_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; -}; - -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - class AzureObjectStorage : public IObjectStorage { public: - - using SettingsPtr = std::unique_ptr; + using ClientPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_, const String & description_); @@ -159,12 +105,8 @@ public: bool isRemote() const override { return true; } - std::shared_ptr getSettings() { return settings.get(); } - - std::shared_ptr getAzureBlobStorageClient() override - { - return client.get(); - } + std::shared_ptr getSettings() const { return settings.get(); } + std::shared_ptr getAzureBlobStorageClient() const override { return client.get(); } bool supportParallelWrite() const override { return true; } @@ -174,8 +116,8 @@ private: const String name; /// client used to access the files in the Blob Storage cloud - MultiVersion client; - MultiVersion settings; + MultiVersion client; + MultiVersion settings; const String object_namespace; /// container + prefix /// We use source url without container and prefix as description, because in Azure there are no limitations for operations between different containers. diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 727dbeed853..93ef2659cbb 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -127,7 +127,7 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } #if USE_AZURE_BLOB_STORAGE - std::shared_ptr getAzureBlobStorageClient() override + std::shared_ptr getAzureBlobStorageClient() const override { return object_storage->getAzureBlobStorageClient(); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 59cc82d8c81..5c45a258806 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -195,7 +195,6 @@ public: /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Add a cache layer. /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// There can be any number of cache layers: @@ -204,7 +203,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; -#endif bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 44854633d65..56d5d11ef8a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -222,11 +222,7 @@ ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting() { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD return Context::getGlobalContextInstance()->getServerSettings().storage_metadata_write_full_object_key; -#else - return false; -#endif } } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 9f5c14fdb7c..ceea4d5a2bb 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -75,6 +75,7 @@ struct RelativePathWithMetadata virtual std::string getPath() const { return relative_path; } virtual bool isArchive() const { return false; } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } + virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata @@ -260,7 +261,7 @@ public: virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } #if USE_AZURE_BLOB_STORAGE - virtual std::shared_ptr getAzureBlobStorageClient() + virtual std::shared_ptr getAzureBlobStorageClient() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage"); } diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index 8997f40b9a2..a042f4bd8b9 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index ab7c2069b43..a690ecd2757 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -2,9 +2,7 @@ #include #include #include -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include -#endif #include #include @@ -135,7 +133,6 @@ void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory) }); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & factory) { factory.registerMetadataStorageType("web", []( @@ -147,7 +144,6 @@ void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & fa return std::make_shared(assert_cast(*object_storage)); }); } -#endif void registerMetadataStorages() { @@ -155,9 +151,7 @@ void registerMetadataStorages() registerMetadataStorageFromDisk(factory); registerPlainMetadataStorage(factory); registerPlainRewritableMetadataStorage(factory); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerMetadataStorageFromStaticFilesWebServer(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1bf8250adff..d9535358daf 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -7,19 +7,17 @@ #include #include #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS #include #include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE #include -#include +#include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include #include -#endif #include #include #include @@ -284,7 +282,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS void registerHDFSObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType( @@ -309,7 +307,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) } #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE void registerAzureObjectStorage(ObjectStorageFactory & factory) { auto creator = []( @@ -319,21 +317,27 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) const ContextPtr & context, bool /* skip_access_check */) -> ObjectStoragePtr { - AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + auto azure_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*azure_settings, /*for_disk=*/ true), + }; return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix, - endpoint.getEndpointWithoutContainer()); + AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), + params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix, + params.endpoint.getEndpointWithoutContainer()); }; + factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); } #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerWebObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType("web", []( @@ -381,7 +385,6 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) factory.registerObjectStorageType("local_blob_storage", creator); factory.registerObjectStorageType("local", creator); } -#endif void registerObjectStorages() { @@ -393,18 +396,16 @@ void registerObjectStorages() registerS3PlainRewritableObjectStorage(factory); #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS registerHDFSObjectStorage(factory); #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE registerAzureObjectStorage(factory); #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerWebObjectStorage(factory); registerLocalObjectStorage(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h index 6dddc227ade..5cf1fbef2ab 100644 --- a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h +++ b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h @@ -1,14 +1,14 @@ #pragma once +#include "config.h" + #if USE_AWS_S3 # include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE # include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD -# include -#endif +#include #include namespace ProfileEvents @@ -42,7 +42,7 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -53,7 +53,6 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -62,6 +61,5 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create(&isStringOrFixedString), nullptr, "String or FixedString"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); + validateFunctionArguments(*this, arguments, mandatory_arguments); return std::make_shared(); } diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 593646240ca..c658063b66f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -95,22 +95,21 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName return res; } -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description) -{ - if (arguments.size() <= argument_index) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", - func.getName()); - - const auto & argument = arguments[argument_index]; - if (!validator_func(*argument)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description); -} - namespace { + +String withOrdinalEnding(size_t i) +{ + switch (i) + { + case 0: return "1st"; + case 1: return "2nd"; + case 2: return "3rd"; + default: return std::to_string(i) + "th"; + } + +} + void validateArgumentsImpl(const IFunction & func, const ColumnsWithTypeAndName & arguments, size_t argument_offset, @@ -120,20 +119,18 @@ void validateArgumentsImpl(const IFunction & func, { const auto argument_index = i + argument_offset; if (argument_index >= arguments.size()) - { break; - } const auto & arg = arguments[i + argument_offset]; const auto & descriptor = descriptors[i]; if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0) throw Exception(error_code, - "Illegal type of argument #{}{} of function {}{}{}", - argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}), + "A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}", + withOrdinalEnding(argument_offset + i), + descriptor.name, func.getName(), - (descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}), - (arg.type ? ", got " + arg.type->getName() : String{})); + descriptor.type_name, + arg.type ? arg.type->getName() : ""); } } @@ -141,52 +138,42 @@ void validateArgumentsImpl(const IFunction & func, int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const { - if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type))) + if (name.empty() || type_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set"); + + if (type_validator && (data_type == nullptr || !type_validator(*data_type))) return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT; - if (column_validator_func && (column == nullptr || !column_validator_func(*column))) + if (column_validator && (column == nullptr || !column_validator(*column))) return ErrorCodes::ILLEGAL_COLUMN; return 0; } -void validateFunctionArgumentTypes(const IFunction & func, - const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args) +void validateFunctionArguments(const IFunction & func, + const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args) { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto join_argument_types = [](const auto & args, const String sep = ", ") - { - String result; - for (const auto & a : args) - { - using A = std::decay_t; - if constexpr (std::is_same_v) - { - if (a.argument_name) - result += "'" + std::string(a.argument_name) + "' : "; - if (a.expected_type_description) - result += a.expected_type_description; - } - else if constexpr (std::is_same_v) - result += a.type->getName(); + auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; }; - result += sep; - } - - if (!args.empty()) - result.erase(result.end() - sep.length(), result.end()); - - return result; - }; + String expected_args_string; + if (!mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)); + else if (!mandatory_args.empty() && optional_args.empty()) + expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments" + else if (mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)); + else + expected_args_string = "0 arguments"; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})", - func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}), - mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""), - join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : "")); + "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", + func.getName(), + expected_args_string, + fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } validateArgumentsImpl(func, arguments, 0, mandatory_args); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 6267d8eacc4..4f93b236bcb 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -115,77 +115,58 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col); /// column if it is nullable. ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns); -/// Checks argument type at specified index with predicate. -/// throws if there is no argument at specified index or if predicate returns false. -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description); - -/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected - * - * Also it is used to generate function description when arguments do not match expected ones. - * Any field can be null: - * `argument_name` - if not null, reported via type check errors. - * `expected_type_description` - if not null, reported via type check errors. - * `type_validator_func` - if not null, used to validate data type of function argument. - * `column_validator_func` - if not null, used to validate column of function argument. - */ +/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided +/// arguments match the expected arguments. struct FunctionArgumentDescriptor { - const char * argument_name; + /// The argument name, e.g. "longitude". + /// Should not be empty. + std::string_view name; + /// A function which validates the argument data type. + /// May be nullptr. using TypeValidator = bool (*)(const IDataType &); - TypeValidator type_validator_func; + TypeValidator type_validator; + + /// A function which validates the argument column. + /// May be nullptr. using ColumnValidator = bool (*)(const IColumn &); - ColumnValidator column_validator_func; + ColumnValidator column_validator; - const char * expected_type_description; + /// The expected argument type, e.g. "const String" or "UInt64". + /// Should not be empty. + std::string_view type_name; - /** Validate argument type and column. - * - * Returns non-zero error code if: - * Validator != nullptr && (Value == nullptr || Validator(*Value) == false) - * For: - * Validator is either `type_validator_func` or `column_validator_func` - * Value is either `data_type` or `column` respectively. - * ILLEGAL_TYPE_OF_ARGUMENT if type validation fails - * - */ + /// Validate argument type and column. int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const; }; using FunctionArgumentDescriptors = std::vector; -/** Validate that function arguments match specification. - * - * Designed to simplify argument validation for functions with variable arguments - * (e.g. depending on result type or other trait). - * First, checks that number of arguments is as expected (including optional arguments). - * Second, checks that mandatory args present and have valid type. - * Third, checks optional arguments types, skipping ones that are missing. - * - * Please note that if you have several optional arguments, like f([a, b, c]), - * only these calls are considered valid: - * f(a) - * f(a, b) - * f(a, b, c) - * - * But NOT these: f(a, c), f(b, c) - * In other words you can't omit middle optional arguments (just like in regular C++). - * - * If any mandatory arg is missing, throw an exception, with explicit description of expected arguments. - */ -void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args = {}); +/// Validates that the user-provided arguments match the expected arguments. +/// +/// Checks that +/// - the number of provided arguments matches the number of mandatory/optional arguments, +/// - all mandatory arguments are present and have the right type, +/// - optional arguments - if present - have the right type. +/// +/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid: +/// f(a) +/// f(a, b) +/// f(a, b, c) +/// but these are NOT: +/// f(a, c) +/// f(b, c) +void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args = {}); /// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw. std::pair, const ColumnArray::Offset *> checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments); -/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns. - * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. - */ +/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns. +/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count); struct NullPresence diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index aee04a5969a..b4bcfa514a8 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -40,7 +40,7 @@ public: {"replacement", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index d6cf6a24983..0ca47126198 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) @@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } } diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index c418163343b..e282bcfbfe2 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -47,7 +47,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 524b4f82acd..7af6265eba9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -165,7 +165,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, @@ -438,7 +438,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 2a0b2f1d075..f3e54d2fbd9 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -2020,7 +2020,7 @@ public: DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if constexpr (to_decimal) @@ -2049,7 +2049,7 @@ public: optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if constexpr (std::is_same_v) { @@ -2390,7 +2390,7 @@ public: if (isDateTime64(arguments)) { - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..6b65a5feaec 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -518,66 +518,78 @@ struct Dispatcher template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - auto col_res = ColumnVector::create(); - - typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(value_col_typed.getData().size()); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - if (scale_arg == 0) - { - size_t scale = 1; - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); - } - else if (scale_arg > 0) - { - size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); - } - else - { - size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); - } - } - /// Non-const scale argument: - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & value_data = value_col_typed.getData(); - const auto & scale_data = scale_col_typed->getData(); - const size_t rows = value_data.size(); + auto col_res = ColumnVector::create(); - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale_data[i]; - validateScale(scale64); - Scale raw_scale = scale64; + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(value_col_typed->getData().size()); - if (raw_scale == 0) + if (!vec_res.empty()) + { + // Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + if (scale_arg == 0) { size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); } - else if (raw_scale > 0) + else if (scale_arg > 0) { - size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + size_t scale = intExp10(scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); } else { - size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + size_t scale = intExp10(-scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & value_data = value_col_typed->getData(); + const auto & scale_data = scale_col_typed->getData(); + const size_t rows = value_data.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale_data[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + if (raw_scale == 0) + { + size_t scale = 1; + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else if (raw_scale > 0) + { + size_t scale = intExp10(raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else + { + size_t scale = intExp10(-raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } } } } + return col_res; } - - return col_res; + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); + } + return nullptr; } }; @@ -589,38 +601,52 @@ public: template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - const typename ColumnDecimal::Container & vec_src = value_col_typed.getData(); - - auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed.getScale()); - auto & vec_res = col_res->getData(); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg); - } - /// Non-const scale argument - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale = scale_col_typed->getData(); - const size_t rows = vec_src.size(); + const typename ColumnDecimal::Container & vec_src = value_col_typed->getData(); - for (size_t i = 0; i < rows; ++i) + auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed->getScale()); + auto & vec_res = col_res->getData(); + vec_res.resize(vec_src.size()); + + if (!vec_res.empty()) + { + /// Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) { - Int64 scale64 = scale[i]; - validateScale(scale64); - Scale raw_scale = scale64; + auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + DecimalRoundingImpl::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg); + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & scale = scale_col_typed->getData(); + const size_t rows = vec_src.size(); - DecimalRoundingImpl::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + } } } - } - return col_res; + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); + } + return nullptr; } }; @@ -647,7 +673,7 @@ public: FunctionArgumentDescriptors optional_args{ {"N", static_cast(&isNativeInteger), nullptr, "The number of decimal places to round to"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return arguments[0].type; } @@ -671,9 +697,6 @@ public: using ScaleTypes = std::decay_t; using ScaleType = typename ScaleTypes::RightType; - if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column"); - res = Dispatcher::template apply(value_arg.column.get(), scale_column.column.get()); return true; }; diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index 84e87061398..73dd55f1266 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -48,7 +48,7 @@ namespace {"json", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index b6bd463212f..b317d786fab 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -23,7 +23,11 @@ namespace ProfileEvents { -extern const Event RegexpCreated; + extern const Event RegexpWithMultipleNeedlesCreated; + extern const Event RegexpWithMultipleNeedlesGlobalCacheHit; + extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss; + extern const Event RegexpLocalCacheHit; + extern const Event RegexpLocalCacheMiss; } @@ -72,18 +76,28 @@ public: Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE]; if (bucket.regexp == nullptr) [[unlikely]] + { /// insert new entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } else + { if (pattern != bucket.pattern) + { /// replace existing entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } + else + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit); + } return bucket.regexp; } private: - constexpr static size_t CACHE_SIZE = 100; /// collision probability + constexpr static size_t CACHE_SIZE = 1'000; /// collision probability std::hash hasher; struct Bucket @@ -244,7 +258,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message)); } - ProfileEvents::increment(ProfileEvents::RegexpCreated); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated); /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch /// function which is faster than allocating scratch space each time in each thread. @@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else + { if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance) { /// replace existing entry @@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } + else + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit); + } return bucket.regexps; } diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index c08f41f06ee..0f565df8172 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -32,7 +32,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 7c796116b8d..2cb5995e375 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 16ace36d39b..b3d51d02162 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 43079834872..ce2aadaeede 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -31,7 +31,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp index 76c1ec18171..c3586a57161 100644 --- a/src/Functions/array/FunctionsMapMiscellaneous.cpp +++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp @@ -51,6 +51,8 @@ public: bool isVariadic() const override { return impl.isVariadic(); } size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); } + bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } @@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase struct MapToSubcolumnAdapter { - static_assert(position <= 1); + static_assert(position <= 1, "position of Map subcolumn must be 0 or 1"); static void extractNestedTypes(DataTypes & types) { @@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; }; using FunctionMapValues = FunctionMapToArrayAdapter, NameMapValues>; struct NameMapContains { static constexpr auto name = "mapContains"; }; -using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; +using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; struct NameMapFilter { static constexpr auto name = "mapFilter"; }; using FunctionMapFilter = FunctionMapToArrayAdapter, NameMapFilter>; diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 87f3390ac73..7db20667888 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -87,7 +87,7 @@ public: {"array_1", static_cast(&isArray), nullptr, "Array"}, {"array_2", static_cast(&isArray), nullptr, "Array"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared>(); } diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index b08a73b93f3..6e176b6e33d 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -39,7 +39,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); // Return an array with the same nested type as the input array const DataTypePtr & array_type = arguments[0].type; diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index 8932482c69c..7c97d8136fb 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -31,7 +31,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"length", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const DataTypeArray * array_type = checkAndGetDataType(arguments[0].type.get()); return std::make_shared(std::make_shared(array_type->getNestedType())); diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 421408c01f2..12bab410fec 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -159,7 +159,7 @@ public: {"separator", static_cast(&isString), isColumnConst, "const String"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c366a1ecb44..d561430d51f 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -5,6 +5,7 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -24,6 +25,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -37,9 +40,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; - /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + const UInt8 word_size = 8 * sizeof(*pos); + size_t n = end - pos; + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.push_back(0); @@ -102,10 +108,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 1c37cd3bf4c..05b8581c792 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -6,6 +6,7 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -25,6 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -53,9 +56,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; - /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + const UInt8 word_size = 8; + size_t n = end - pos; + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { /// insert default value out_vec.push_back(0); @@ -90,10 +96,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 44b39811882..995b5fa91e7 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -203,7 +203,7 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if (isDecimal(type) || isDateTime64(type)) @@ -212,9 +212,9 @@ private: if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); - optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); + optional_args.push_back({"default_value", nullptr, nullptr, "any type"}); - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); size_t additional_argument_index = 1; diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index fbbb9d017ee..5f07b936e26 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -38,7 +38,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp index 968a7628ca5..c48f8c13152 100644 --- a/src/Functions/dateTimeToSnowflakeID.cpp +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -43,7 +43,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } @@ -91,7 +91,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index 5801a7b8f4f..4a3eb32474c 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -59,7 +59,7 @@ public: {"pattern", static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index dfcd0e31715..7732855b211 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -74,7 +74,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared(std::make_shared())); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index f62352af0bd..ac6266a2e82 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -48,7 +48,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 3b632147864..655ea2e7cde 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: FunctionArgumentDescriptors args{ {"query", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr string_type = std::make_shared(); if (error_handling == ErrorHandling::Null) diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index b98c587d172..0543e6bf229 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -54,7 +54,7 @@ public: { FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 8ac010deafc..a171b6bf86e 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -167,7 +167,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index b0fec43fe94..a928f9009c8 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -30,9 +30,9 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "any type"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..5dc6f1cde32 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -11,20 +11,6 @@ namespace /* Bit layouts of UUIDv7 -without counter: - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ - -with counter: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ @@ -163,7 +149,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index b2454f5dffc..96ad7dacfc4 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -38,9 +38,12 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + FunctionArgumentDescriptors args{ + {"encoded", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} + }; + validateFunctionArguments(*this, arguments, args); return std::make_shared( DataTypes{std::make_shared(), std::make_shared()}, diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 7c353b822aa..034c8188b63 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } namespace @@ -40,19 +39,16 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3", - getName()); - } + FunctionArgumentDescriptors mandatory_args{ + {"longitude", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude", static_cast(&isFloat), nullptr, "Float*"} + }; + FunctionArgumentDescriptors optional_args{ + {"precision", static_cast(&isInteger), nullptr, "(U)Int*"} + }; + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp index ac8d4a6ad8f..9429903dda7 100644 --- a/src/Functions/geohashesInBox.cpp +++ b/src/Functions/geohashesInBox.cpp @@ -35,22 +35,25 @@ public: size_t getNumberOfArguments() const override { return 5; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - validateArgumentType(*this, arguments, 2, isFloat, "float"); - validateArgumentType(*this, arguments, 3, isFloat, "float"); - validateArgumentType(*this, arguments, 4, isUInt8, "integer"); + FunctionArgumentDescriptors args{ + {"longitute_min", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_min", static_cast(&isFloat), nullptr, "Float*"}, + {"longitute_max", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_max", static_cast(&isFloat), nullptr, "Float*"}, + {"precision", static_cast(&isUInt8), nullptr, "UInt8"} + }; + validateFunctionArguments(*this, arguments, args); - if (!(arguments[0]->equals(*arguments[1]) && - arguments[0]->equals(*arguments[2]) && - arguments[0]->equals(*arguments[3]))) + if (!(arguments[0].type->equals(*arguments[1].type) && + arguments[0].type->equals(*arguments[2].type) && + arguments[0].type->equals(*arguments[3].type))) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of {} all coordinate arguments must have the same type, " - "instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(), - arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName()); + "instead they are:{}, {}, {}, {}.", getName(), arguments[0].type->getName(), + arguments[1].type->getName(), arguments[2].type->getName(), arguments[3].type->getName()); } return std::make_shared(std::make_shared()); diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 3d8b8617472..41a09793994 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -87,7 +87,7 @@ public: {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } else { @@ -95,7 +95,7 @@ public: {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } return std::make_shared(); @@ -193,7 +193,7 @@ public: {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -357,7 +357,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -440,7 +440,7 @@ public: {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if (arguments.size() >= mandatory_argument_names.size() + 1) { @@ -572,7 +572,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -652,7 +652,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional precision argument auto precision = DEFAULT_PRECISION; diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 162b8c58873..339eb4cb26c 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -589,7 +589,7 @@ namespace {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr date_type = std::make_shared(time_zone_name); diff --git a/src/Functions/parseReadableSize.cpp b/src/Functions/parseReadableSize.cpp index f5c2c53439b..1abcf7f164f 100644 --- a/src/Functions/parseReadableSize.cpp +++ b/src/Functions/parseReadableSize.cpp @@ -68,7 +68,7 @@ public: { {"readable_size", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr return_type = std::make_shared(); if constexpr (error_handling == ErrorHandling::Null) return std::make_shared(return_type); diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index cfb42580cb0..3cc5393296c 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -54,7 +54,7 @@ public: if (arguments.size() == 3) args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 7f2fe646062..aa90bf2490d 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -201,7 +201,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 618808b64ed..720aa1e0799 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -45,7 +45,7 @@ public: {"time_series", static_cast(&isArray), nullptr, "Array"}, {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared(std::make_shared())); } diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 81fc904e16e..4063d0ab85b 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -51,7 +51,7 @@ public: {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index e85b3a97c67..471354235d5 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -53,7 +53,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 5ff8a636058..31ea6a28ece 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -64,7 +64,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -121,7 +121,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) @@ -190,7 +190,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -255,7 +255,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index b799792a56f..9a1d5b8a74b 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -56,7 +56,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) @@ -127,7 +127,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 83183c991bc..ce12f2f541c 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -48,7 +48,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 6679646fef4..0e133590b84 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -100,7 +100,7 @@ public: FunctionArgumentDescriptors args{ {"sqid", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index fbca08b0968..6f2bd2030d5 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -46,7 +46,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(DATETIME_SCALE); } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index fc621b272de..4ee664ad237 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -43,7 +43,7 @@ public: {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); + validateFunctionArguments(*this, arguments, mandatory_args, {}); return std::make_shared(); } diff --git a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp index 2a979f500f7..407977f1f13 100644 --- a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp +++ b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp @@ -1,5 +1,7 @@ #include +#include +#include namespace DB { diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 8bd436f218c..128df415197 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,10 +16,12 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUploadPart; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUploadPart; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; } @@ -45,7 +47,7 @@ namespace size_t total_size_, const String & dest_container_for_logging_, const String & dest_blob_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, const Poco::Logger * log_) : create_read_buffer(create_read_buffer_) @@ -70,7 +72,7 @@ namespace size_t total_size; const String & dest_container_for_logging; const String & dest_blob; - std::shared_ptr settings; + std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; const Poco::Logger * log; size_t max_single_part_upload_size; @@ -156,6 +158,10 @@ namespace void completeMultipartUpload() { auto block_blob_client = client->GetBlockBlobClient(dest_blob); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + block_blob_client.CommitBlockList(block_ids); } @@ -259,9 +265,9 @@ namespace void processUploadPartRequest(UploadPartTask & task) { - ProfileEvents::increment(ProfileEvents::AzureUploadPart); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); if (client->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart); + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); @@ -312,7 +318,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr dest_client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; @@ -329,11 +335,10 @@ void copyAzureBlobStorageFile( size_t size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 9c20ee4cff0..c8e48fcd372 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -28,7 +28,7 @@ void copyAzureBlobStorageFile( size_t src_size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); @@ -45,7 +45,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); } diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 5c9a69893df..0ec733f7840 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -4,12 +4,15 @@ #include #include +#include #include #include #include +#include "config.h" + namespace ProfileEvents { @@ -41,10 +44,13 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; + [[maybe_unused]] bool allow_gwp_asan_force_sample; + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - explicit Memory(size_t size_, size_t alignment_ = 0) : alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0, bool allow_gwp_asan_force_sample_ = false) + : alignment(alignment_), allow_gwp_asan_force_sample(allow_gwp_asan_force_sample_) { alloc(size_); } @@ -127,6 +133,11 @@ private: ProfileEvents::increment(ProfileEvents::IOBufferAllocs); ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, new_capacity); +#if USE_GWP_ASAN + if (unlikely(allow_gwp_asan_force_sample && GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = new_size; @@ -154,7 +165,7 @@ protected: public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) - : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) + : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment, /*allow_gwp_asan_force_sample_=*/true) { Base::set(existing_memory ? existing_memory : memory.data(), size); Base::padded = !existing_memory; diff --git a/src/IO/S3/BlobStorageLogWriter.cpp b/src/IO/S3/BlobStorageLogWriter.cpp index c2f0cb86928..d3b97771790 100644 --- a/src/IO/S3/BlobStorageLogWriter.cpp +++ b/src/IO/S3/BlobStorageLogWriter.cpp @@ -56,7 +56,6 @@ void BlobStorageLogWriter::addEvent( BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Keeper standalone build doesn't have a context if (auto blob_storage_log = Context::getGlobalContextInstance()->getBlobStorageLog()) { auto log_writer = std::make_shared(std::move(blob_storage_log)); @@ -67,7 +66,6 @@ BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) return log_writer; } -#endif return {}; } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e654d091561..dd038948adf 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -134,7 +134,7 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() if (file_segment->getDownloadedSize() > 0) return std::make_unique(file_segment->getPath()); else - return std::make_unique(); + return std::make_unique(); } } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f9b91a45978..d3f152b7a67 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,7 +1,5 @@ #pragma once -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - #include #include #include @@ -1451,9 +1449,3 @@ struct HTTPContext : public IHTTPContext }; } - -#else - -#include - -#endif diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index aaec94a4fb0..841decf29c5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -274,10 +274,12 @@ void DatabaseCatalog::shutdownImpl() database->shutdown(); } + TablesMarkedAsDropped tables_marked_dropped_to_destroy; { std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.clear(); + tables_marked_dropped.swap(tables_marked_dropped_to_destroy); } + tables_marked_dropped_to_destroy.clear(); std::lock_guard lock(databases_mutex); for (const auto & db : databases) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a990eb651ce..0ee2bb6c0e9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1328,7 +1328,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && create.select) + bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; + if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1338,11 +1339,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) is_storage_replicated = true; } - const bool allow_create_select_for_replicated = create.isView() || create.is_create_empty || !is_storage_replicated; + const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 9cfb8e486cb..39d5d9e9cef 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -61,24 +61,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (table->supportsLightweightDelete()) + auto lightweightDelete = [&]() { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, @@ -105,17 +88,77 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); + }; + + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) + { + return lightweightDelete(); } else { - /// Currently just better exception for the case of a table with projection, - /// can act differently according to the setting. if (table->hasProjection()) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); + auto context = Context::createCopy(getContext()); + auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; + if (mode == LightweightMutationProjectionMode::THROW) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + else if (mode == LightweightMutationProjectionMode::DROP) + { + std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); + + context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); + + /// Drop projections first so that lightweight delete can be performed. + for (const auto & projection : all_projections) + { + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " DROP PROJECTION IF EXISTS " + projection; + + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + InterpreterAlterQuery alter_interpreter(alter_ast, context); + alter_interpreter.execute(); + } + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); + } + + return lightweightDelete(); } throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 3a06e1b2301..7c7b4b3f95a 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -43,6 +43,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace @@ -170,6 +171,7 @@ struct QueryASTSettings struct QueryTreeSettings { bool run_passes = true; + bool dump_tree = true; bool dump_passes = false; bool dump_ast = false; Int64 passes = -1; @@ -179,6 +181,7 @@ struct QueryTreeSettings std::unordered_map> boolean_settings = { {"run_passes", run_passes}, + {"dump_tree", dump_tree}, {"dump_passes", dump_passes}, {"dump_ast", dump_ast} }; @@ -398,7 +401,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl() throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); auto settings = checkAndGetSettings(ast.getSettings()); + if (!settings.dump_tree && !settings.dump_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query"); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + bool need_newline = false; if (settings.run_passes) { @@ -410,23 +417,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (settings.dump_passes) { query_tree_pass_manager.dump(buf, pass_index); - if (pass_index > 0) - buf << '\n'; + need_newline = true; } query_tree_pass_manager.run(query_tree, pass_index); + } + + if (settings.dump_tree) + { + if (need_newline) + buf << "\n\n"; query_tree->dumpTree(buf); - } - else - { - query_tree->dumpTree(buf); + need_newline = true; } if (settings.dump_ast) { - buf << '\n'; - buf << '\n'; + if (need_newline) + buf << "\n\n"; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp deleted file mode 100644 index f0202199752..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - return std::make_shared(Nested::concatenateName(name_in_storage, subcolumn_name)); -} - -ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("equals", ast, std::make_shared(0u)); -} - -ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("notEquals", ast, std::make_shared(0u)); -} - -ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("not", ast); -} - -ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("sum", makeASTFunction("not", ast)); -} - -ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("has", ast, arg); -} - -const std::unordered_map> unary_function_to_subcolumn = -{ - {"length", {TypeIndex::Array, "size0", transformToSubcolumn}}, - {"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}}, - {"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}}, - {"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}}, - {"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}}, -}; - -const std::unordered_map> binary_function_to_subcolumn -{ - {"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}}, -}; - -} - -void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const -{ - const auto & arguments = function.arguments->children; - if (arguments.empty() || arguments.size() > 2) - return; - - const auto * identifier = arguments[0]->as(); - if (!identifier) - return; - - const auto & columns = metadata_snapshot->getColumns(); - const auto & name_in_storage = identifier->name(); - - if (!columns.has(name_in_storage)) - return; - - const auto & column_type = columns.get(name_in_storage).type; - TypeIndex column_type_id = column_type->getTypeId(); - const auto & alias = function.tryGetAlias(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - } - } - else - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64) - { - const auto & type_tuple = assert_cast(*column_type); - auto index = literal->value.get(); - subcolumn_name = type_tuple.getNameByPosition(index); - } - else if (value_type == Field::Types::String) - subcolumn_name = literal->value.get(); - else - return; - - ast = transformToSubcolumn(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type != Field::Types::String) - return; - - subcolumn_name = literal->value.get(); - ast = transformToSubcolumn(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - else - { - auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name, arguments[1]); - ast->setAlias(alias); - } - } - } - } -} - -} diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h deleted file mode 100644 index 4d064bdee10..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTFunction; - -/// Rewrites functions to subcolumns, if possible, to reduce amount of read data. -/// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnData -{ -public: - using TypeToVisit = ASTFunction; - void visit(ASTFunction & function, ASTPtr & ast) const; - - StorageMetadataPtr metadata_snapshot; -}; - -using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnVisitor = InDepthNodeVisitor; - -} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c331c8640d6..b88d75cd5a2 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -564,12 +563,6 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot) -{ - RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot}; - RewriteFunctionToSubcolumnVisitor(data).visit(query); -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -634,9 +627,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) - optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); - /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) optimizeAggregationFunctions(query); diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 972e109afb3..2e70a58b5a1 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -31,6 +31,36 @@ ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & req return castColumn({elem_column, elem_type, ""}, requested_column.type); } +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn) +{ + const auto * elem = block.findByName(requested_subcolumn.getNameInStorage()); + if (!elem) + return nullptr; + + auto subcolumn_name = requested_subcolumn.getSubcolumnName(); + /// If requested subcolumn is dynamic, we should first perform cast and then + /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. + if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + { + auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); + auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); + auto elem_type = requested_column_type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return elem_column; + } + + auto elem_column = elem->type->tryGetSubcolumn(subcolumn_name, elem->column); + auto elem_type = elem->type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return castColumn({elem_column, elem_type, ""}, requested_subcolumn.type); +} + ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column) { auto result_column = tryGetColumnFromBlock(block, requested_column); diff --git a/src/Interpreters/getColumnFromBlock.h b/src/Interpreters/getColumnFromBlock.h index 26500cfdd17..737ce9db555 100644 --- a/src/Interpreters/getColumnFromBlock.h +++ b/src/Interpreters/getColumnFromBlock.h @@ -9,5 +9,6 @@ namespace DB ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn); } diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 7ca27cf6584..88bb6b9ce76 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2e7693b1b36..6dc0c021a14 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -30,12 +30,15 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : ISource(storage_snapshot->getSampleBlockForColumns(column_names_)) - , column_names_and_types(storage_snapshot->getColumnsByNames( + , requested_column_names_and_types(storage_snapshot->getColumnsByNames( GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), column_names_)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) { + auto all_column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects()); + for (const auto & [name, type] : all_column_names_and_types) + all_names_to_types[name] = type; } String getName() const override { return "Memory"; } @@ -59,17 +62,20 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - size_t num_columns = column_names_and_types.size(); + size_t num_columns = requested_column_names_and_types.size(); columns.reserve(num_columns); - auto name_and_type = column_names_and_types.begin(); + auto name_and_type = requested_column_names_and_types.begin(); for (size_t i = 0; i < num_columns; ++i) { - columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); + if (name_and_type->isSubcolumn()) + columns.emplace_back(tryGetSubcolumnFromBlock(src, all_names_to_types[name_and_type->getNameInStorage()], *name_and_type)); + else + columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); ++name_and_type; } - fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, {}, nullptr); + fillMissingColumns(columns, src.rows(), requested_column_names_and_types, requested_column_names_and_types, {}, nullptr); assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; })); return Chunk(std::move(columns), src.rows()); @@ -88,7 +94,9 @@ private: } } - const NamesAndTypesList column_names_and_types; + const NamesAndTypesList requested_column_names_and_types; + /// Map (name -> type) for all columns from the storage header. + std::unordered_map all_names_to_types; size_t execution_index = 0; std::shared_ptr data; std::shared_ptr> parallel_execution_index; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8ecb36aab7f..b4ef8f04c53 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -409,7 +409,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); } @@ -508,7 +508,7 @@ Pipe ReadFromMergeTree::readFromPool( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -623,7 +623,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (set_total_rows_approx) source->addTotalRowsApprox(total_rows); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index b9f61d30182..a694fa43e46 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include @@ -71,6 +74,9 @@ public: size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; // Compares ORDER BY column values at given rows to find the boundaries of frame: @@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_, } } } + + for (const auto & workspace : workspaces) + { + if (workspace.window_function_impl) + { + if (!workspace.window_function_impl->checkWindowFrameType(this)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'", + workspace.aggregate_function->getName()); + } + } + + } } WindowTransform::~WindowTransform() @@ -1609,6 +1628,34 @@ struct WindowFunctionHelpers { recurrent_detail::setValueToOutputColumn(transform, function_index, value); } + + ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; } + + ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) + { + /// This is for fast check. + if (!transform->partition_ended) + return false; + + auto current_row = transform->current_row; + /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here. + current_row.row++; + const auto & partition_end_row = transform->partition_end; + + /// The partition end is reached, when following is true + /// - current row is the partition end row, + /// - or current row is the last row of all input. + if (current_row != partition_end_row) + { + /// when current row is not the partition end row, we need to check whether it's the last + /// input row. + if (current_row.row < transform->blockRowsNumber(current_row)) + return false; + if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) + return false; + } + return true; + } }; template @@ -2058,8 +2105,6 @@ namespace const WindowTransform * transform, size_t function_index, const DataTypes & argument_types); - - static void checkWindowFrameType(const WindowTransform * transform); }; } @@ -2080,6 +2125,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction bool allocatesMemoryInArena() const override { return false; } + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->order_by_indices.empty()) + { + LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause"); + return false; + } + + // We must wait all for the partition end and get the total rows number in this + // partition. So before the end of this partition, there is no any block could be + // dropped out. + bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded + && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; + if (!is_frame_supported) + { + LOG_ERROR( + getLogger("WindowFunctionNtile"), + "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } + std::optional getDefaultFrame() const override { WindowFrame frame; @@ -2106,7 +2174,6 @@ namespace { if (!buckets) [[unlikely]] { - checkWindowFrameType(transform); const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]]; @@ -2128,7 +2195,7 @@ namespace } } // new partition - if (transform->current_row_number == 1) [[unlikely]] + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]] { current_partition_rows = 0; current_partition_inserted_row = 0; @@ -2137,25 +2204,9 @@ namespace current_partition_rows++; // Only do the action when we meet the last row in this partition. - if (!transform->partition_ended) + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) return; - else - { - auto current_row = transform->current_row; - current_row.row++; - const auto & end_row = transform->partition_end; - if (current_row != end_row) - { - if (current_row.row < transform->blockRowsNumber(current_row)) - return; - if (end_row.block != current_row.block + 1 || end_row.row) - { - return; - } - // else, current_row is the last input row. - } - } auto bucket_capacity = current_partition_rows / buckets; auto capacity_diff = current_partition_rows - bucket_capacity * buckets; @@ -2193,23 +2244,115 @@ namespace bucket_num += 1; } } +} - void NtileState::checkWindowFrameType(const WindowTransform * transform) +namespace +{ +struct PercentRankState +{ + RowNumber start_row; + UInt64 current_partition_rows = 0; +}; +} + +struct WindowFunctionPercentRank final : public StatefulWindowFunction +{ +public: + WindowFunctionPercentRank(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + {} + + bool allocatesMemoryInArena() const override { return false; } + + bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->order_by_indices.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); + if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); + return false; + } + return true; + } - // We must wait all for the partition end and get the total rows number in this - // partition. So before the end of this partition, there is no any block could be - // dropped out. - bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded - && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; - if (!is_frame_supported) + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::RANGE; + frame.begin_type = WindowFrame::BoundaryType::Unbounded; + frame.end_type = WindowFrame::BoundaryType::Current; + return frame; + } + + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override + { + auto & state = getWorkspaceState(transform, function_index); + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + state.current_partition_rows = 0; + state.start_row = transform->current_row; + } + + insertRankIntoColumn(transform, function_index); + state.current_partition_rows++; + + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) + { + return; + } + + UInt64 remaining_rows = state.current_partition_rows; + Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1; + + while (remaining_rows > 0) + { + auto block_rows_number = transform->blockRowsNumber(state.start_row); + auto available_block_rows = block_rows_number - state.start_row.row; + if (available_block_rows <= remaining_rows) + { + /// This partition involves multiple blocks. Finish current block and move on to the + /// next block. + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row; i < block_rows_number; ++i) + data[i] = (data[i] - 1) / percent_rank_denominator; + + state.start_row.block++; + state.start_row.row = 0; + remaining_rows -= available_block_rows; + } + else + { + /// The partition ends in current block.s + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) + { + data[i] = (data[i] - 1) / percent_rank_denominator; + } + state.start_row.row += remaining_rows; + remaining_rows = 0; + } } } -} + + + inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const + { + const auto & workspace = transform->workspaces[function_index]; + return getState(workspace); + } + + inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const + { + auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number)); + } +}; // ClickHouse-specific variant of lag/lead that respects the window frame. template @@ -2582,6 +2725,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("percent_rank", {[](const std::string & name, + const DataTypes & argument_types, const Array & parameters, const Settings *) + { + return std::make_shared(name, argument_types, + parameters); + }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 10b59751b22..cb36df1efc0 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1735,10 +1735,19 @@ namespace class GRPCServer::Runner { public: - explicit Runner(GRPCServer & owner_) : owner(owner_) {} + explicit Runner(GRPCServer & owner_) : owner(owner_), log(owner.log) {} ~Runner() { + try + { + stop(); + } + catch (...) + { + tryLogCurrentException(log, "~Runner"); + } + if (queue_thread.joinable()) queue_thread.join(); } @@ -1756,13 +1765,27 @@ public: } catch (...) { - tryLogCurrentException("GRPCServer"); + tryLogCurrentException(log, "run"); } }; queue_thread = ThreadFromGlobalPool{runner_function}; } - void stop() { stopReceivingNewCalls(); } + void stop() + { + std::lock_guard lock{mutex}; + should_stop = true; + + if (current_calls.empty()) + { + /// If there are no current calls then we call shutdownQueue() to signal the queue to stop waiting for next events. + /// The following line will make CompletionQueue::Next() stop waiting if the queue is empty and return false instead. + shutdownQueue(); + + /// If there are some current calls then we can't call shutdownQueue() right now because we want to let the current calls finish. + /// In this case function shutdownQueue() will be called later in run(). + } + } size_t getNumCurrentCalls() const { @@ -1789,12 +1812,6 @@ private: [this, call_type](bool ok) { onNewCall(call_type, ok); }); } - void stopReceivingNewCalls() - { - std::lock_guard lock{mutex}; - should_stop = true; - } - void onNewCall(CallType call_type, bool responder_started_ok) { std::lock_guard lock{mutex}; @@ -1827,38 +1844,47 @@ private: void run() { setThreadName("GRPCServerQueue"); - while (true) + + bool ok = false; + void * tag = nullptr; + + while (owner.queue->Next(&tag, &ok)) { - { - std::lock_guard lock{mutex}; - finished_calls.clear(); /// Destroy finished calls. - - /// If (should_stop == true) we continue processing until there is no active calls. - if (should_stop && current_calls.empty()) - { - bool all_responders_gone = std::all_of( - responders_for_new_calls.begin(), responders_for_new_calls.end(), - [](std::unique_ptr & responder) { return !responder; }); - if (all_responders_gone) - break; - } - } - - bool ok = false; - void * tag = nullptr; - if (!owner.queue->Next(&tag, &ok)) - { - /// Queue shutted down. - break; - } - auto & callback = *static_cast(tag); callback(ok); + + std::lock_guard lock{mutex}; + finished_calls.clear(); /// Destroy finished calls. + + /// If (should_stop == true) we continue processing while there are current calls. + if (should_stop && current_calls.empty()) + shutdownQueue(); } + + /// CompletionQueue::Next() returns false if the queue is fully drained and shut down. + } + + /// Shutdown the queue if that isn't done yet. + void shutdownQueue() + { + chassert(should_stop); + if (queue_is_shut_down) + return; + + queue_is_shut_down = true; + + /// Server should be shut down before CompletionQueue. + if (owner.grpc_server) + owner.grpc_server->Shutdown(); + + if (owner.queue) + owner.queue->Shutdown(); } GRPCServer & owner; + LoggerRawPtr log; ThreadFromGlobalPool queue_thread; + bool queue_is_shut_down = false; std::vector> responders_for_new_calls; std::map> current_calls; std::vector> finished_calls; @@ -1876,16 +1902,6 @@ GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & addr GRPCServer::~GRPCServer() { - /// Server should be shutdown before CompletionQueue. - if (grpc_server) - grpc_server->Shutdown(); - - /// Completion Queue should be shutdown before destroying the runner, - /// because the runner is now probably executing CompletionQueue::Next() on queue_thread - /// which is blocked until an event is available or the queue is shutting down. - if (queue) - queue->Shutdown(); - runner.reset(); } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index dff960f7031..1f3e038a1f5 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,9 +18,6 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe { try { - /// Raw config reference is used here to avoid dependency on Context and ServerSettings. - /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1 - /// And there ordinary Context is replaced with a tiny clone. const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 8d14a849894..b41ad2376f1 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index dd11c1dfc58..76a6776ed9c 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -23,7 +23,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a522a3f8782..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1875,7 +1875,7 @@ void TCPHandler::receiveQuery() #endif } - query_context = session->makeQueryContext(std::move(client_info)); + query_context = session->makeQueryContext(client_info); /// Sets the default database if it wasn't set earlier for the session context. if (is_interserver_mode && !default_database.empty()) @@ -1890,6 +1890,16 @@ void TCPHandler::receiveQuery() /// /// Settings /// + + /// FIXME: Remove when allow_experimental_analyzer will become obsolete. + /// Analyzer became Beta in 24.3 and started to be enabled by default. + /// We have to disable it for ourselves to make sure we don't have different settings on + /// different servers. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && client_info.getVersionNumber() < VersionNumber(23, 3, 0) + && !passed_settings.allow_experimental_analyzer.changed) + passed_settings.set("allow_experimental_analyzer", false); + auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 98afd844046..6217470780d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -165,6 +165,8 @@ public: /// Returns true if the storage supports reading of subcolumns of complex types. virtual bool supportsSubcolumns() const { return false; } + /// Returns true if storage supports optimizations of functions by reading subcolumns. + virtual bool supportsOptimizationToSubcolumns() const { return supportsSubcolumns(); } /// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries. /// Storage may throw an exception later if some query kind is not fully supported. diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index f3283247672..893cf222556 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -37,7 +37,10 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override { return true; } + bool isRemote() const final { return true; } + bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } protected: virtual void updateBeforeRead(const ContextPtr &) {} diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index bdea46a8210..c2e0e778220 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1313,6 +1313,17 @@ void IMergeTreeDataPart::loadRowsCount() auto buf = metadata_manager->read("count.txt"); readIntText(rows_count, *buf); assertEOF(*buf); + + if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity) + { + /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. + index_granularity.popMark(); + index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); + if (rows_count != index_granularity.getTotalRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index granularity total rows in part {} does not match rows_count: {}, instead of {}", + name, index_granularity.getTotalRows(), rows_count); + } }; if (index_granularity.empty()) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 6152da78395..c87f66b64f3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h index cef1d10e4ad..e817421f7d0 100644 --- a/src/Storages/MergeTree/IPartMetadataManager.h +++ b/src/Storages/MergeTree/IPartMetadataManager.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7ab8fa2430a..c8f1a08128b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -555,18 +555,18 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); - auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { const auto & reread_buf_ref = *reread_buf; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFile, but got {}", demangle(typeid(reread_buf_ref).name())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFileBase, but got {}", demangle(typeid(reread_buf_ref).name())); } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` /// Then, move ownership to unique_ptr to concrete type. - std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + std::unique_ptr reread_buffer_from_file(reread_buffer_raw); /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..5c9191dbb54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 5ba326cef0c..a69d21de8e7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -558,7 +558,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) { - throw Exception( + /// With fixed granularity we can have last mark with less rows than granularity + const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); + if (!index_granularity_info.fixed_index_granularity || !is_last_mark) + throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", @@ -821,7 +824,14 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// Without offset rows_written_in_last_mark = 0; } + + if (compute_granularity) + { + index_granularity.popMark(); + index_granularity.appendMark(new_rows_in_last_mark); + } } + } } diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index fcf2dd76e3f..e323b9f9ee7 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -133,9 +133,8 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) - : ISource(processor_->getHeader()) - , processor(std::move(processor_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_) + : ISource(processor_->getHeader()), processor(std::move(processor_)), log_name(log_name_) { #if defined(OS_LINUX) if (processor->getSettings().use_asynchronous_read_from_pool) @@ -207,7 +206,7 @@ std::optional MergeTreeSource::tryGenerate() try { - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; holder->setResult(processor->read()); } catch (...) @@ -222,7 +221,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; return processReadResult(processor->read()); } diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 655f0ee6ebe..fc39b4f9b09 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -12,7 +12,7 @@ struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_); ~MergeTreeSource() override; std::string getName() const override; @@ -30,6 +30,7 @@ protected: private: MergeTreeSelectProcessorPtr processor; + const std::string log_name; #if defined(OS_LINUX) struct AsyncReadingState; diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index f763a997bfb..91cc02de0f0 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_AZURE_BLOB_STORAGE @@ -40,72 +41,19 @@ const std::unordered_set optional_configuration_keys = { "storage_account_url", }; -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - -namespace -{ - bool isConnectionString(const std::string & candidate) - { - return !candidate.starts_with("http"); - } - - template - bool containerExists(T & blob_service_client, const std::string & container_name) - { - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client.ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - auto it = std::find_if( - containers_list.begin(), containers_list.end(), - [&](const auto & c) { return c.Name == container_name; }); - return it != containers_list.end(); - } -} - -Poco::URI StorageAzureConfiguration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - void StorageAzureConfiguration::check(ContextPtr context) const { - context->getGlobalContext()->getRemoteHostFilter().checkURL(getConnectionURL()); + auto url = Poco::URI(connection_params.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(url); Configuration::check(context); } StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other) : Configuration(other) { - connection_url = other.connection_url; - is_connection_string = other.is_connection_string; - account_name = other.account_name; - account_key = other.account_key; - container = other.container; blob_path = other.blob_path; blobs_paths = other.blobs_paths; -} - -AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context) -{ - const auto & context_settings = context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; - settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; - settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; - settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; - return settings_ptr; + connection_params = other.connection_params; } StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const @@ -126,174 +74,59 @@ StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings( ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); - auto client = createClient(is_readonly, /* attempt_to_create_container */true); - auto settings = createSettings(context); + + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(connection_params, is_readonly); + return std::make_unique( - "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); + "AzureBlobStorage", + connection_params.createForContainer(), + std::move(settings), + connection_params.getContainer(), + connection_params.getConnectionURL()); } -AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) +static AzureBlobStorage::ConnectionParams getConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const ContextPtr & local_context) { - using namespace Azure::Storage::Blobs; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); - AzureClientPtr result; - - if (is_connection_string) + if (account_name && account_key) { - auto managed_identity_credential = std::make_shared(); - auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(*blob_service_client, container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } + connection_params.endpoint.storage_account_url = connection_url; + connection_params.endpoint.container_name = container_name; + connection_params.auth_method = std::make_shared(*account_name, *account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); } else { - std::shared_ptr storage_shared_key_credential; - if (account_name.has_value() && account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*account_name, *account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container - + connection_url.substr(pos); - } - else - final_url - = connection_url + (connection_url.back() == '/' ? "" : "/") + container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(*blob_service_client, container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); - } catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); } - return result; + return connection_params; } - void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr) +void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr context) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + String connection_url; + String container_name; + std::optional account_name; + std::optional account_key; + if (collection.has("connection_string")) - { connection_url = collection.get("connection_string"); - is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { + else if (collection.has("storage_account_url")) connection_url = collection.get("storage_account_url"); - is_connection_string = false; - } - container = collection.get("container"); + container_name = collection.get("container"); blob_path = collection.get("blob_path"); if (collection.has("account_name")) @@ -307,6 +140,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); blobs_paths = {blob_path}; + connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, context); } void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) @@ -324,12 +158,14 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, std::unordered_map engine_args_to_idx; - connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - is_connection_string = isConnectionString(connection_url); - container = checkAndGetLiteralArgument(engine_args[1], "container"); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container_name = checkAndGetLiteralArgument(engine_args[1], "container"); blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + std::optional account_name; + std::optional account_key; + auto is_format_arg = [] (const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s)); @@ -386,7 +222,9 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (is_format_arg(sixth_arg)) + { format = sixth_arg; + } else { if (with_structure) @@ -428,6 +266,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, } blobs_paths = {blob_path}; + connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, context); } void StorageAzureConfiguration::addStructureAndFormatToArgs( diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index bbaa82c51ba..4e6bfbc0745 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -35,8 +35,8 @@ public: const Paths & getPaths() const override { return blobs_paths; } void setPaths(const Paths & paths) override { blobs_paths = paths; } - String getNamespace() const override { return container; } - String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; } + String getNamespace() const override { return connection_params.getContainer(); } + String getDataSourceDescription() const override { return std::filesystem::path(connection_params.getConnectionURL()) / connection_params.getContainer(); } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; @@ -54,22 +54,9 @@ protected: void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; - using AzureClient = Azure::Storage::Blobs::BlobContainerClient; - using AzureClientPtr = std::unique_ptr; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; std::string blob_path; std::vector blobs_paths; - - AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); - AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); - Poco::URI getConnectionURL() const; + AzureBlobStorage::ConnectionParams connection_params; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 69fec2b3c77..108aa109616 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -24,12 +24,6 @@ public: std::string getName() const override; - bool supportsSubcolumns() const override { return true; } - - bool supportsDynamicSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - RemoteQueryExecutor::Extension getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & context) const override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6bded90f11d..6940f10cb91 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -65,7 +65,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( CurrentMetrics::StorageObjectStorageThreadsActive, CurrentMetrics::StorageObjectStorageThreadsScheduled, 1/* max_threads */)) - , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) @@ -156,20 +155,20 @@ std::shared_ptr StorageObjectStorageSourc return iterator; } -void StorageObjectStorageSource::lazyInitialize(size_t processor) +void StorageObjectStorageSource::lazyInitialize() { if (initialized) return; - reader = createReader(processor); + reader = createReader(); if (reader) - reader_future = createReaderAsync(processor); + reader_future = createReaderAsync(); initialized = true; } Chunk StorageObjectStorageSource::generate() { - lazyInitialize(0); + lazyInitialize(); while (true) { @@ -196,13 +195,12 @@ Chunk StorageObjectStorageSource::generate() const auto & filename = object_info->getFileName(); chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, read_from_format_info.requested_virtual_columns, - { - .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified - }); + chunk, + read_from_format_info.requested_virtual_columns, + {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified}); const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) @@ -227,7 +225,6 @@ Chunk StorageObjectStorageSource::generate() chunk.addColumn(std::move(partition_column)); } } - return chunk; } @@ -261,27 +258,30 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf schema_cache.addNumRows(cache_key, num_rows); } -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader() { - const auto cache_key = getKeyForSchemaCache( - getUniqueStoragePathIdentifier(*configuration, object_info), - configuration->format, - format_settings, - getContext()); - - auto get_last_mod_time = [&]() -> std::optional - { - return object_info.metadata - ? std::optional(object_info.metadata->last_modified.epochTime()) - : std::nullopt; - }; - return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); + return createReader( + 0, file_iterator, configuration, object_storage, read_from_format_info, format_settings, + key_condition, getContext(), &schema_cache, log, max_block_size, max_parsing_threads, need_only_count); } -StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count) { ObjectInfoPtr object_info; - auto query_settings = configuration->getQuerySettings(getContext()); + auto query_settings = configuration->getQuerySettings(context_); do { @@ -302,9 +302,29 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade std::shared_ptr source; std::unique_ptr read_buf; + auto try_get_num_rows_from_cache = [&]() -> std::optional + { + if (!schema_cache) + return std::nullopt; + + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, *object_info), + configuration->format, + format_settings, + context_); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache->tryGetNumRows(cache_key, get_last_mod_time); + }; + std::optional num_rows_from_cache = need_only_count - && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(*object_info) + && context_->getSettingsRef().use_cache_for_count_from_files + ? try_get_num_rows_from_cache() : std::nullopt; if (num_rows_from_cache) @@ -329,14 +349,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); - read_buf = createReadBuffer(*object_info); + read_buf = createReadBuffer(*object_info, object_storage, context_, log); } auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), + context_, max_block_size, format_settings, need_only_count ? 1 : max_parsing_threads, @@ -345,20 +365,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade compression_method, need_only_count); - if (key_condition) - input_format->setKeyCondition(key_condition); + if (key_condition_) + input_format->setKeyCondition(key_condition_); if (need_only_count) input_format->needOnlyCount(); builder.init(Pipe(input_format)); - if (columns_desc.hasDefaults()) + if (read_from_format_info.columns_description.hasDefaults()) { builder.addSimpleTransform( [&](const Block & header) { - return std::make_shared(header, columns_desc, *input_format, getContext()); + return std::make_shared(header, read_from_format_info.columns_description, *input_format, context_); }); } @@ -381,21 +401,25 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); } -std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +std::future StorageObjectStorageSource::createReaderAsync() { - return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); + return create_reader_scheduler([=, this] { return createReader(); }, Priority{}); } -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +std::unique_ptr StorageObjectStorageSource::createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log) { const auto & object_size = object_info.metadata->size_bytes; - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + auto read_settings = context_->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; /// FIXME: Changing this setting to default value breaks something around parquet reading read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool object_too_small = object_size <= 2 * context_->getSettings().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; /// User's object may change, don't cache it. @@ -715,10 +739,9 @@ static IArchiveReader::NameFilter createArchivePathFilter(const std::string & ar StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_) - : archive_object(archive_object_) - , path_in_archive(path_in_archive_) - , archive_reader(archive_reader_) + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_) + : archive_object(archive_object_), path_in_archive(path_in_archive_), archive_reader(archive_reader_), file_info(file_info_) { } @@ -757,6 +780,7 @@ StorageObjectStorageSource::ObjectInfoPtr StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) { std::unique_lock lock{next_mutex}; + IArchiveReader::FileInfo current_file_info{}; while (true) { if (filter) @@ -781,6 +805,8 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) path_in_archive = file_enumerator->getFileName(); if (!filter(path_in_archive)) continue; + else + current_file_info = file_enumerator->getFileInfo(); } else { @@ -794,15 +820,19 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) continue; + else + current_file_info = archive_reader->getFileInfo(path_in_archive); } - - auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); - - if (read_keys != nullptr) - read_keys->push_back(object_in_archive); - - return object_in_archive; + break; } + + auto object_in_archive + = std::make_shared(archive_object, path_in_archive, archive_reader, std::move(current_file_info)); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; } size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 094a1420894..271b38fa75c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -76,7 +76,6 @@ protected: const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; - ColumnsDescription columns_desc; std::shared_ptr file_iterator; SchemaCache & schema_cache; bool initialized = false; @@ -117,13 +116,32 @@ protected: std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t processor = 0); - std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + static ReaderHolder createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count); + + ReaderHolder createReader(); + + std::future createReaderAsync(); + static std::unique_ptr createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log); void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); - void lazyInitialize(size_t processor); + void lazyInitialize(); }; class StorageObjectStorageSource::IIterator @@ -260,7 +278,8 @@ public: ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_); + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_); std::string getFileName() const override { @@ -279,9 +298,12 @@ public: bool isArchive() const override { return true; } + size_t fileSizeInArchive() const override { return file_info.uncompressed_size; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; + const IArchiveReader::FileInfo file_info; }; private: diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 52ee0c9f8ed..6fac519849d 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -62,6 +62,11 @@ void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & e last_exception = exception; } +void ObjectStorageQueueIFileMetadata::FileStatus::updateState(State state_) +{ + state = state_; +} + std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); @@ -224,9 +229,14 @@ bool ObjectStorageQueueIFileMetadata::setProcessing() auto [success, file_state] = setProcessingImpl(); if (success) + { file_status->onProcessing(); + } else + { + LOG_TEST(log, "Updating state of {} from {} to {}", path, file_status->state.load(), file_state); file_status->updateState(file_state); + } LOG_TEST(log, "File {} has state `{}`: will {}process (processing id version: {})", path, file_state, success ? "" : "not ", diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index f0e55c202a2..9bab9e7f075 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -23,7 +23,7 @@ public: void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); - void updateState(State state_) { state = state_; } + void updateState(State state_); std::string getException() const; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 371a23f5a66..4d921003e04 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -359,41 +359,38 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_) - : ISource(header_) + : ISource(read_from_format_info_.source_header) , WithContext(context_) , name(std::move(name_)) , processor_id(processor_id_) - , action(action_) + , file_iterator(file_iterator_) + , configuration(configuration_) + , object_storage(object_storage_) + , read_from_format_info(read_from_format_info_) + , format_settings(format_settings_) + , queue_settings(queue_settings_) , files_metadata(files_metadata_) - , internal_source(std::move(internal_source_)) - , requested_virtual_columns(requested_virtual_columns_) + , max_block_size(max_block_size_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) , system_queue_log(system_queue_log_) , storage_id(storage_id_) - , max_processed_files_before_commit(max_processed_files_before_commit_) - , max_processed_rows_before_commit(max_processed_rows_before_commit_) - , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) - , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) , commit_once_processed(commit_once_processed_) - , remove_file_func(remove_file_func_) , log(log_) { } @@ -403,21 +400,6 @@ String ObjectStorageQueueSource::getName() const return name; } -void ObjectStorageQueueSource::lazyInitialize(size_t processor) -{ - if (initialized) - return; - - LOG_TEST(log, "Initializing a new reader"); - - internal_source->lazyInitialize(processor); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - - initialized = true; -} - Chunk ObjectStorageQueueSource::generate() { Chunk chunk; @@ -442,19 +424,33 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - lazyInitialize(processor_id); - while (true) { if (!reader) { - LOG_TEST(log, "No reader"); - break; + if (shutdown_called) + { + LOG_TEST(log, "Shutdown called"); + break; + } + + const auto context = getContext(); + reader = StorageObjectStorageSource::createReader( + processor_id, file_iterator, configuration, object_storage, read_from_format_info, + format_settings, nullptr, context, nullptr, log, max_block_size, + context->getSettingsRef().max_parsing_threads.value, /* need_only_count */false); + + if (!reader) + { + LOG_TEST(log, "No reader"); + break; + } } const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); + const auto & path = reader.getObjectInfo()->getPath(); if (isCancelled()) { @@ -477,8 +473,6 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - const auto & path = reader.getObjectInfo()->getPath(); - if (shutdown_called) { LOG_TEST(log, "Shutdown called"); @@ -526,7 +520,7 @@ Chunk ObjectStorageQueueSource::generateImpl() total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, requested_virtual_columns, + chunk, read_from_format_info.requested_virtual_columns, { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes @@ -545,9 +539,6 @@ Chunk ObjectStorageQueueSource::generateImpl() if (processed_rows_from_file == 0) { - auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); - chassert(file_iterator); - if (file_status->retries < file_metadata->getMaxTries()) file_iterator->returnForRetry(reader.getObjectInfo()); @@ -562,11 +553,13 @@ Chunk ObjectStorageQueueSource::generateImpl() file_status->setProcessingEndTime(); file_status.reset(); + reader = {}; processed_rows_from_file = 0; processed_files.push_back(file_metadata); - if (processed_files.size() == max_processed_files_before_commit) + if (queue_settings.max_processed_files_before_commit + && processed_files.size() == queue_settings.max_processed_files_before_commit) { LOG_TRACE(log, "Number of max processed files before commit reached " "(rows: {}, bytes: {}, files: {})", @@ -574,68 +567,30 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - bool rows_or_bytes_or_time_limit_reached = false; - if (max_processed_rows_before_commit - && total_processed_rows == max_processed_rows_before_commit) + if (queue_settings.max_processed_rows_before_commit + && total_processed_rows == queue_settings.max_processed_rows_before_commit) { LOG_TRACE(log, "Number of max processed rows before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processed_bytes_before_commit - && total_processed_bytes == max_processed_bytes_before_commit) + else if (queue_settings.max_processed_bytes_before_commit + && total_processed_bytes == queue_settings.max_processed_bytes_before_commit) { LOG_TRACE(log, "Number of max processed bytes before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processing_time_sec_before_commit - && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + else if (queue_settings.max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= queue_settings.max_processing_time_sec_before_commit) { LOG_TRACE(log, "Max processing time before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; - } - - if (rows_or_bytes_or_time_limit_reached) - { - if (!reader_future.valid()) - break; - - LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " - "will process it despite the limit"); - } - - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } - - chassert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - { - LOG_TEST(log, "Reader finished"); - break; - } - - file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - - if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); - } } return {}; @@ -679,12 +634,11 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { - switch (action) + switch (queue_settings.after_processing.value) { case ObjectStorageQueueAction::DELETE: { - assert(remove_file_func); - remove_file_func(path); + object_storage->removeObject(StoredObject(path)); break; } case ObjectStorageQueueAction::KEEP: diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index ccd87e8a269..0f3d0ab2e92 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -21,7 +21,6 @@ class ObjectStorageQueueSource : public ISource, WithContext public: using Storage = StorageObjectStorage; using Source = StorageObjectStorageSource; - using RemoveFileFunc = std::function; using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; @@ -97,22 +96,20 @@ public: ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_); static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -128,29 +125,27 @@ public: private: const String name; const size_t processor_id; - const ObjectStorageQueueAction action; + const std::shared_ptr file_iterator; + const ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const ReadFromFormatInfo read_from_format_info; + const std::optional format_settings; + const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; + const size_t max_block_size; + const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; const std::shared_ptr system_queue_log; const StorageID storage_id; - const size_t max_processed_files_before_commit; - const size_t max_processed_rows_before_commit; - const size_t max_processed_bytes_before_commit; - const size_t max_processing_time_sec_before_commit; const bool commit_once_processed; - RemoveFileFunc remove_file_func; LoggerPtr log; std::vector processed_files; std::vector failed_during_read_files; Source::ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; size_t processed_rows_from_file = 0; size_t total_processed_rows = 0; @@ -165,8 +160,6 @@ private: ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - - void lazyInitialize(size_t processor); }; } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 95265cde9ea..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -352,43 +352,14 @@ std::shared_ptr StorageObjectStorageQueue::createSourc ContextPtr local_context, bool commit_once_processed) { - auto internal_source = std::make_unique( - getName(), - object_storage, - configuration, - info, - format_settings, - local_context, - max_block_size, - file_iterator, - local_context->getSettingsRef().max_download_threads, - false); - - auto file_deleter = [=, this](const std::string & path) mutable - { - object_storage->removeObject(StoredObject(path)); - }; - return std::make_shared( - getName(), - processor_id, - info.source_header, - std::move(internal_source), - files_metadata, - queue_settings->after_processing, - file_deleter, - info.requested_virtual_columns, - local_context, - shutdown_called, - table_is_being_dropped, + getName(), processor_id, + file_iterator, configuration, object_storage, + info, format_settings, + *queue_settings, files_metadata, + local_context, max_block_size, shutdown_called, table_is_being_dropped, getQueueLog(object_storage, local_context, *queue_settings), - getStorageID(), - log, - queue_settings->max_processed_files_before_commit, - queue_settings->max_processed_rows_before_commit, - queue_settings->max_processed_bytes_before_commit, - queue_settings->max_processing_time_sec_before_commit, - commit_once_processed); + getStorageID(), log, commit_once_processed); } bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h index 758721674fe..fc459c45f74 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h @@ -79,6 +79,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index fed0bd61c03..28e75c6d244 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,15 +1,14 @@ -#include -#include - #include #include -#include -#include +#include +#include #include #include #include #include #include +#include + namespace DB { @@ -20,32 +19,57 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -/// Version / bitmask of statistics / data of statistics / enum StatisticsFileVersion : UInt16 { V0 = 0, }; -IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} +IStatistics::IStatistics(const SingleStatisticsDescription & stat_) + : stat(stat_) +{ +} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_), rows(0) + : stats_desc(stats_desc_) { } void ColumnStatistics::update(const ColumnPtr & column) { rows += column->size(); - for (const auto & iter : stats) - { - iter.second->update(column); - } + for (const auto & stat : stats) + stat.second->update(column); } +UInt64 IStatistics::estimateCardinality() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateEqual(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateLess(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); +} + +/// ------------------------------------- +/// Implementation of the estimation: +/// Note: Each statistics object supports certain types predicates natively, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. +/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics +/// object that supports it natively. + Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -58,14 +82,9 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto uniq_static = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); - /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) - /// for every bucket. - if (uniq_static->getCardinality() < 2048) - { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); - return tdigest_static->estimateEqual(val); - } + /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. + if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) + return stats.at(StatisticsType::TDigest)->estimateEqual(val); } if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; @@ -73,17 +92,22 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const return rows * ConditionSelectivityEstimator::default_good_cond_factor; } +/// ------------------------------------- + void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); + UInt64 stat_types_mask = 0; for (const auto & [type, _]: stats) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// We write some basic statistics + + /// as the column row count is always useful, save it in any case writeIntBinary(rows, buf); - /// We write complex statistics - for (const auto & [type, stat_ptr]: stats) + + /// write the actual statistics object + for (const auto & [type, stat_ptr] : stats) stat_ptr->serialize(buf); } @@ -96,7 +120,9 @@ void ColumnStatistics::deserialize(ReadBuffer &buf) UInt64 stat_types_mask = 0; readIntBinary(stat_types_mask, buf); + readIntBinary(rows, buf); + for (auto it = stats.begin(); it != stats.end();) { if (!(stat_types_mask & 1 << UInt8(it->first))) @@ -136,15 +162,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va { if (!validators.emplace(stats_type, std::move(validator)).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type); - } MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerCreator(StatisticsType::TDigest, TDigestCreator); - registerCreator(StatisticsType::Uniq, UniqCreator); registerValidator(StatisticsType::TDigest, TDigestValidator); + registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::Uniq, UniqValidator); + registerCreator(StatisticsType::Uniq, UniqCreator); } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -159,9 +185,7 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st { auto it = validators.find(type); if (it == validators.end()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistic type '{}'", type); it->second(desc, data_type); } } @@ -173,10 +197,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - { - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: tdigest, uniq", type); - } + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 2ab1337af02..d4364075d1c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,19 +1,15 @@ #pragma once -#include -#include - #include -#include #include #include #include +#include namespace DB { -/// this is for user-defined statistic. constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; @@ -25,14 +21,21 @@ class IStatistics { public: explicit IStatistics(const SingleStatisticsDescription & stat_); - virtual ~IStatistics() = default; - virtual void serialize(WriteBuffer & buf) = 0; + virtual void update(const ColumnPtr & column) = 0; + virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; - virtual void update(const ColumnPtr & column) = 0; + /// Estimate the cardinality of the column. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual UInt64 estimateCardinality() const; + + /// Per-value estimations. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column + virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column protected: SingleStatisticsDescription stat; @@ -43,11 +46,12 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); - String getFileName() const; + String getFileName() const; const String & columnName() const; UInt64 rowCount() const; @@ -55,17 +59,14 @@ public: void update(const ColumnPtr & column); Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; private: - friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; std::map stats; - UInt64 rows; /// the number of rows of the column + UInt64 rows = 0; /// the number of rows in the column }; class ColumnsDescription; @@ -79,25 +80,23 @@ public: void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; + using Validator = std::function; using Creator = std::function; - using Validator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; - ColumnsStatistics getMany(const ColumnsDescription & columns) const; - void registerCreator(StatisticsType type, Creator creator); void registerValidator(StatisticsType type, Validator validator); + void registerCreator(StatisticsType type, Creator creator); protected: MergeTreeStatisticsFactory(); private: - using Creators = std::unordered_map; using Validators = std::unordered_map; - Creators creators; + using Creators = std::unordered_map; Validators validators; + Creators creators; }; } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp new file mode 100644 index 00000000000..0747197370c --- /dev/null +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -0,0 +1,60 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_STATISTICS; +} + +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) + : IStatistics(stat_) +{ +} + +void StatisticsTDigest::update(const ColumnPtr & column) +{ + size_t rows = column->size(); + + for (size_t row = 0; row < rows; ++row) + { + /// TODO: support more types. + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); + } +} + +void StatisticsTDigest::serialize(WriteBuffer & buf) +{ + t_digest.serialize(buf); +} + +void StatisticsTDigest::deserialize(ReadBuffer & buf) +{ + t_digest.deserialize(buf); +} + +Float64 StatisticsTDigest::estimateLess(Float64 val) const +{ + return t_digest.getCountLessThan(val); +} + +Float64 StatisticsTDigest::estimateEqual(Float64 val) const +{ + return t_digest.getCountEqual(val); +} + +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); +} + +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +{ + return std::make_shared(stat); +} + +} diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/StatisticsTDigest.h similarity index 63% rename from src/Storages/Statistics/TDigestStatistics.h rename to src/Storages/Statistics/StatisticsTDigest.h index 7c361b8751f..d3a3bf115ee 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -6,27 +6,24 @@ namespace DB { - -/// TDigestStatistic is a kind of histogram. -class TDigestStatistics : public IStatistics +class StatisticsTDigest : public IStatistics { public: - explicit TDigestStatistics(const SingleStatisticsDescription & stat_); - - Float64 estimateLess(Float64 val) const; - - Float64 estimateEqual(Float64 val) const; - - void serialize(WriteBuffer & buf) override; - - void deserialize(ReadBuffer & buf) override; + explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); void update(const ColumnPtr & column) override; + + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + + Float64 estimateLess(Float64 val) const override; + Float64 estimateEqual(Float64 val) const override; + private: - QuantileTDigest data; + QuantileTDigest t_digest; }; -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/StatisticsUniq.cpp similarity index 76% rename from src/Storages/Statistics/UniqStatistics.cpp rename to src/Storages/Statistics/StatisticsUniq.cpp index fc748e769ca..bf9a40ea8cb 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) : IStatistics(stat_) { arena = std::make_unique(); @@ -20,29 +20,12 @@ UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const collector->create(data); } -UniqStatistics::~UniqStatistics() +StatisticsUniq::~StatisticsUniq() { collector->destroy(data); } -UInt64 UniqStatistics::getCardinality() -{ - auto column = DataTypeUInt64().createColumn(); - collector->insertResultInto(data, *column, nullptr); - return column->getUInt(0); -} - -void UniqStatistics::serialize(WriteBuffer & buf) -{ - collector->serialize(data, buf); -} - -void UniqStatistics::deserialize(ReadBuffer & buf) -{ - collector->deserialize(data, buf); -} - -void UniqStatistics::update(const ColumnPtr & column) +void StatisticsUniq::update(const ColumnPtr & column) { /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. /// Here we intend to avoid crash in CI. @@ -51,16 +34,33 @@ void UniqStatistics::update(const ColumnPtr & column) collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } +void StatisticsUniq::serialize(WriteBuffer & buf) +{ + collector->serialize(data, buf); +} + +void StatisticsUniq::deserialize(ReadBuffer & buf) +{ + collector->deserialize(data, buf); +} + +UInt64 StatisticsUniq::estimateCardinality() const +{ + auto column = DataTypeUInt64().createColumn(); + collector->insertResultInto(data, *column, nullptr); + return column->getUInt(0); +} + void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(stat, data_type); } } diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/StatisticsUniq.h similarity index 77% rename from src/Storages/Statistics/UniqStatistics.h rename to src/Storages/Statistics/StatisticsUniq.h index 0d86a6e458a..5290585bd94 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -7,30 +7,27 @@ namespace DB { -class UniqStatistics : public IStatistics +class StatisticsUniq : public IStatistics { public: - UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - - ~UniqStatistics() override; - - UInt64 getCardinality(); - - void serialize(WriteBuffer & buf) override; - - void deserialize(ReadBuffer & buf) override; + StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + ~StatisticsUniq() override; void update(const ColumnPtr & column) override; -private: + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + UInt64 estimateCardinality() const override; + +private: std::unique_ptr arena; AggregateFunctionPtr collector; AggregateDataPtr data; }; -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp deleted file mode 100644 index aa5662c979d..00000000000 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ILLEGAL_STATISTICS; -} - -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_): - IStatistics(stat_) -{ -} - -Float64 TDigestStatistics::estimateLess(Float64 val) const -{ - return data.getCountLessThan(val); -} - -Float64 TDigestStatistics::estimateEqual(Float64 val) const -{ - return data.getCountEqual(val); -} - -void TDigestStatistics::serialize(WriteBuffer & buf) -{ - data.serialize(buf); -} - -void TDigestStatistics::deserialize(ReadBuffer & buf) -{ - data.deserialize(buf); -} - -void TDigestStatistics::update(const ColumnPtr & column) -{ - size_t size = column->size(); - - for (size_t i = 0; i < size; ++i) - { - /// TODO: support more types. - Float64 value = column->getFloat64(i); - data.add(value, 1); - } -} - -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) -{ - return std::make_shared(stat); -} - -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) -{ - data_type = removeNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName()); -} - -} diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index f94f310be56..c3c14632ba1 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,6 @@ #include -#include +#include TEST(Statistics, TDigestLessThan) { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ac094aeb489..f955889185c 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -89,6 +89,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsDynamicSubcolumns() const override { return true; } diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index f5a4362901e..9549f3a035c 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -27,15 +27,8 @@ public: const ConstraintsDescription & constraints_); std::string getName() const override { return "FileCluster"; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - - bool supportsDynamicSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..6e8f425f8dc --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,169 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) + { + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); + fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); + + if (base_before_fuzz == fuzzed_text) + continue; + + /// AST is too long, will start from the original query. + if (config.max_query_length > 500) + { + fuzz_base = query; + continue; + } + + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; + data_to.resize(next_offset); + + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); + + data_to[offset + fuzzed_text.size()] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + fuzz_base = new_query; + ++row_num; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, false); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + // Supported signatures: + // + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() >= 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..125ef960e74 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query; + UInt64 max_query_length = 500; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + , fuzzer(config_.random_seed) + { + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index f27a76dc0dd..47e41cccc96 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -395,11 +395,14 @@ void registerStorageJoin(StorageFactory & factory) else if (kind_str == "full") { if (strictness == JoinStrictness::Any) - strictness = JoinStrictness::RightAny; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); kind = JoinKind::Full; } } + if ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Anti) && (kind != JoinKind::Left && kind != JoinKind::Right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, " SEMI|ANTI JOIN should be LEFT or RIGHT"); + if (kind == JoinKind::Comma) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..ec1559b71a4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,6 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; + manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 3090f8db12e..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -296,6 +296,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsDynamicSubcolumns() const override { return true; } diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index a6334e7430d..31bffa06210 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -30,15 +30,8 @@ public: const StorageURL::Configuration & configuration_); std::string getName() const override { return "URLCluster"; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - - bool supportsDynamicSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 628e5a85437..9f849052071 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -26,6 +26,7 @@ void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); void registerStorageLoop(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -123,6 +124,7 @@ void registerStorages() registerStorageExecutable(factory); registerStorageWindowView(factory); registerStorageLoop(factory); + registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ca4913898f9..a6c90872f12 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -26,6 +26,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index efde4d6dcdc..2a8864a9bfd 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -23,6 +23,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/src/configure_config.cmake b/src/configure_config.cmake index a3f6dae4b87..75f61baa854 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -170,5 +170,8 @@ endif() if (TARGET ch_contrib::pocketfft) set(USE_POCKETFFT 1) endif() +if (TARGET ch_contrib::prometheus_protobufs) + set(USE_PROMETHEUS_PROTOBUFS 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 87f721cfde7..21f16d995a4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -434,7 +434,14 @@ class ClickhouseIntegrationTestsRunner: "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + try: + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + except subprocess.CalledProcessError as ex: + print("ERROR: Setting test plan failed. Output:") + with open(out_file_full, "r", encoding="utf-8") as file: + for line in file: + print(" " + line, end="") + raise ex all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index bf0281cae68..486bfc25e22 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -30,6 +30,9 @@ def get_additional_envs(check_name: str) -> List[str]: if "azure" in check_name: result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") + if "s3" in check_name: + result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + return result diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index 8251ccbaf38..1b71231f820 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -101,23 +101,20 @@ def main(): assert pr_info.merged_pr, "BUG. merged PR number could not been determined" prs = gh.get_pulls_from_search( - query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr is:open", repo="ClickHouse/clickhouse-private", ) - sync_pr = None - if len(prs) > 1: print(f"WARNING: More than one PR found [{prs}] - exiting") elif len(prs) == 0: print("WARNING: No Sync PR found") else: sync_pr = prs[0] - - if args.merge: - merge_sync_pr(gh, sync_pr) - elif args.status: - set_sync_status(gh, pr_info, sync_pr) + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) if __name__ == "__main__": diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..8486e3a885f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -700,7 +700,9 @@ class FailureReason(enum.Enum): NO_LONG = "not running long tests" REPLICATED_DB = "replicated-database" NON_ATOMIC_DB = "database engine not Atomic" + OBJECT_STORAGE = "object-storage" S3_STORAGE = "s3-storage" + AZURE_BLOB_STORAGE = "azure-blob-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" @@ -1226,13 +1228,21 @@ class TestCase: elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: + return FailureReason.AZURE_BLOB_STORAGE elif ( tags - and "no-s3-storage-with-slow-build" in tags - and args.s3_storage + and ("no-object-storage" in tags) + and (args.azure_blob_storage or args.s3_storage) + ): + return FailureReason.OBJECT_STORAGE + elif ( + tags + and "no-object-storage-with-slow-build" in tags + and (args.s3_storage or args.azure_blob_storage) and BuildFlags.RELEASE not in args.build_flags ): - return FailureReason.S3_STORAGE + return FailureReason.OBJECT_STORAGE elif tags: for build_flag in args.build_flags: @@ -3099,6 +3109,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--azure-blob-storage", + action="store_true", + default=False, + help="Run tests over azure blob storage", + ) parser.add_argument( "--no-random-settings", action="store_true", diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file diff --git a/tests/integration/test_analyzer_compatibility/__init__.py b/tests/integration/test_analyzer_compatibility/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..0a50dab7fd3 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + current + 9000 + + + backward + 9000 + + + + + diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py new file mode 100644 index 00000000000..d4ded420c61 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -0,0 +1,100 @@ +import uuid + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT = "24.2" + +cluster = ClickHouseCluster(__file__) +# Here analyzer is enabled by default +current = cluster.add_instance( + "current", + main_configs=["configs/remote_servers.xml"], +) +# Here analyzer is disabled by default +backward = cluster.add_instance( + "backward", + use_old_analyzer=True, + main_configs=["configs/remote_servers.xml"], + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT, + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_two_new_versions(start_cluster): + # Two new versions (both know about the analyzer) + # One have it enabled by default, another one - disabled. + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + query_id = str(uuid.uuid4()) + current.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", + query_id=query_id, + ) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert ( + current.query( + """ +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""" + ) + == TSV([["backward", "true"], ["current", "true"]]) + ) + + # Should be enabled everywhere + analyzer_enabled = current.query( + f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""" + ) + + assert TSV(analyzer_enabled) == TSV("1") + + query_id = str(uuid.uuid4()) + backward.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", + query_id=query_id, + ) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert ( + backward.query( + """ +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""" + ) + == TSV([["backward", "false"], ["current", "false"]]) + ) + + # Should be disabled everywhere + analyzer_enabled = backward.query( + f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""" + ) + + assert TSV(analyzer_enabled) == TSV("0") diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 758dda655da..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -130,10 +130,13 @@ def test_string_functions(start_cluster): functions = map(lambda x: x.strip(), functions) excludes = [ + # The argument of this function is not a seed, but an arbitrary expression needed for bypassing common subexpression elimination. "rand", "rand64", "randConstant", + "randCanonical", "generateUUIDv4", + "generateULID", # Syntax error otherwise "position", "substring", @@ -153,6 +156,18 @@ def test_string_functions(start_cluster): "tryBase64Decode", # Removed in 23.9 "meiliMatch", + # These functions require more than one argument. + "parseDateTimeInJodaSyntaxOrZero", + "parseDateTimeInJodaSyntaxOrNull", + "parseDateTimeOrNull", + "parseDateTimeOrZero", + "parseDateTime", + # The argument is effectively a disk name (and we don't have one with name foo) + "filesystemUnreserved", + "filesystemCapacity", + "filesystemAvailable", + # Exclude it for now. Looks like the result depends on the build type. + "farmHash64", ] functions = filter(lambda x: x not in excludes, functions) @@ -205,6 +220,9 @@ def test_string_functions(start_cluster): # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", + # String foo is obviously not a valid IP address. + "CANNOT_PARSE_IPV4", + "CANNOT_PARSE_IPV6", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) diff --git a/tests/integration/test_config_substitutions/configs/000-server_overrides.xml b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml new file mode 100644 index 00000000000..9335f663d68 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml @@ -0,0 +1,3 @@ + + 10000 + diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml similarity index 100% rename from tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml rename to tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml diff --git a/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml new file mode 100644 index 00000000000..ea91f066a21 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index faceab6fbcd..124dbcaedf7 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -39,9 +39,13 @@ node6 = cluster.add_instance( node7 = cluster.add_instance( "node7", user_configs=[ - "configs/000-config_with_env_subst.xml", + "configs/000-users_with_env_subst.xml", "configs/010-env_subst_override.xml", ], + main_configs=[ + "configs/000-server_overrides.xml", + "configs/010-server_with_env_subst.xml", + ], env_variables={ # overridden with 424242 "MAX_QUERY_SIZE": "121212", @@ -126,9 +130,9 @@ def test_config(start_cluster): ) -def test_config_invalid_overrides(start_cluster): +def test_config_from_env_overrides(start_cluster): node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -156,7 +160,7 @@ def test_config_invalid_overrides(start_cluster): ): node7.query("SYSTEM RELOAD CONFIG") node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -181,6 +185,24 @@ def test_config_invalid_overrides(start_cluster): node7.query("SYSTEM RELOAD CONFIG") +def test_config_merge_from_env_overrides(start_cluster): + assert ( + node7.query( + "SELECT value FROM system.server_settings WHERE name='max_thread_pool_size'" + ) + == "10000\n" + ) + node7.replace_config( + "/etc/clickhouse-server/config.d/010-server_with_env_subst.xml", + """ + + 9000 + +""", + ) + node7.query("SYSTEM RELOAD CONFIG") + + def test_include_config(start_cluster): # assert node4.query("select 1") diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -27,9 +27,6 @@ def make_instance(name, *args, **kwargs): ) -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +35,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +416,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..328ba3bc05c 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -39,6 +39,7 @@ node = cluster.add_instance( "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") }, ipv6_address=IPV6_ADDRESS, + stay_alive=True, ) main_channel = None @@ -369,47 +370,33 @@ def test_progress(): "SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True, ) - results = list(results) - for result in results: - result.time_zone = "" - result.query_id = "" - # print(results) - # Note: We can't convert those messages to string like `results = str(results)` and then compare it as a string - # because str() can serialize a protobuf message with any order of fields. - expected_results = [ - clickhouse_grpc_pb2.Result( - output_format="TabSeparated", - progress=clickhouse_grpc_pb2.Progress( - read_rows=2, read_bytes=16, total_rows_to_read=8 - ), - ), - clickhouse_grpc_pb2.Result(output=b"0\t0\n1\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"2\t0\n3\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"4\t0\n5\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"6\t0\n7\t0\n"), - clickhouse_grpc_pb2.Result( - stats=clickhouse_grpc_pb2.Stats( - rows=8, - blocks=4, - allocated_bytes=1092, - ) - ), + # Note: We can't compare results using a statement like `assert results == expected_results` + # because `results` can come in slightly different order. + # So we compare `outputs` and `progresses` separately and not `results` as a whole. + + outputs = [i.output for i in results if i.output] + progresses = [i.progress for i in results if i.HasField("progress")] + + # print(outputs) + # print(progresses) + + expected_outputs = [ + b"0\t0\n1\t0\n", + b"2\t0\n3\t0\n", + b"4\t0\n5\t0\n", + b"6\t0\n7\t0\n", ] - # Stats data can be returned, which broke the test - results = [i for i in results if not isinstance(i, clickhouse_grpc_pb2.Stats)] + expected_progresses = [ + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16, total_rows_to_read=8), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + ] - assert results == expected_results + assert outputs == expected_outputs + assert progresses == expected_progresses def test_session_settings(): @@ -763,3 +750,9 @@ def test_opentelemetry_context_propagation(): ) == "SELECT 1\tsome custom state\n" ) + + +def test_restart(): + assert query("SELECT 1") == "1\n" + node.restart_clickhouse() + assert query("SELECT 2") == "2\n" diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index fe3acd9a0cf..0eda165b1d2 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -35,7 +35,7 @@ def get_latest_mem_limit(): ).strip() ) return mem_limit - except Exception as e: + except Exception: time.sleep(1) raise Exception("Cannot get memory limit") @@ -51,3 +51,29 @@ def test_observe_memory_limit(started_cluster): if new_max_mem > original_max_mem: return raise Exception("the memory limit does not increase as expected") + + +def test_memory_usage_doesnt_include_page_cache_size(started_cluster): + try: + # populate page cache with 4GB of data; it might be killed by OOM killer but it is fine + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=4K"] + ) + except Exception: + pass + + observer_refresh_period = int( + node1.query( + "select value from system.server_settings where name = 'cgroups_memory_usage_observer_wait_time'" + ).strip() + ) + time.sleep(observer_refresh_period + 1) + + max_mem_usage_from_cgroup = node1.query( + """ + SELECT max(toUInt64(replaceRegexpAll(message, 'Read current memory usage (\\d+) bytes.*', '\\1'))) AS max_mem + FROM system.text_log + WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' + """ + ).strip() + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 06cbe70f7c6..0d0d7a0afb1 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -40,6 +40,16 @@ create_table_sql_template = """ PRIMARY KEY (`id`)) ENGINE=InnoDB; """ +create_table_sql_nullable_template = """ + CREATE TABLE `clickhouse`.`{}` ( + `id` integer not null, + `col1` integer, + `col2` decimal(15,10), + `col3` varchar(32), + `col4` datetime + ) + """ + def skip_test_msan(instance): if instance.is_built_with_memory_sanitizer(): @@ -77,6 +87,11 @@ def create_mysql_db(conn, name): cursor.execute("CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) +def create_mysql_nullable_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_sql_nullable_template.format(table_name)) + + def create_mysql_table(conn, table_name): with conn.cursor() as cursor: cursor.execute(create_table_sql_template.format(table_name)) @@ -192,6 +207,46 @@ def started_cluster(): cluster.shutdown() +def test_mysql_odbc_select_nullable(started_cluster): + skip_test_msan(node1) + mysql_setup = node1.odbc_drivers["MySQL"] + + table_name = "test_insert_nullable_select" + conn = get_mysql_conn() + create_mysql_nullable_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(1, 1, 1.23456, 'data1', '2010-01-01 00:00:00');".format( + table_name + ) + ) + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(2, NULL, NULL, NULL, NULL);".format( + table_name + ) + ) + conn.commit() + + node1.query( + """ + CREATE TABLE {}(id UInt32, col1 Nullable(UInt32), col2 Nullable(Decimal(15, 10)), col3 Nullable(String), col4 Nullable(DateTime)) ENGINE = ODBC('DSN={}', 'clickhouse', '{}'); + """.format( + table_name, mysql_setup["DSN"], table_name + ) + ) + + assert ( + node1.query( + "SELECT id, col1, col2, col3, col4 from {} order by id asc".format( + table_name + ) + ) + == "1\t1\t1.23456\tdata1\t2010-01-01 00:00:00\n2\t\\N\t\\N\t\\N\t\\N\n" + ) + drop_mysql_table(conn, table_name) + conn.close() + + def test_mysql_simple_select_works(started_cluster): skip_test_msan(node1) diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 133991fed7a..a2fa77e8dc9 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -7,7 +7,7 @@ import pytest import sys import threading -from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_logs_contain_with_retry from helpers.uclient import client, prompt @@ -51,7 +51,7 @@ instance = cluster.add_instance( def get_query(name, id): - return f"SElECT '{name}', {id}, number from system.numbers" + return f"SELECT '{name}', {id}, COUNT(*) from system.numbers" def grpc_get_url(): @@ -90,7 +90,7 @@ def threaded_run_test(sessions): if len(sessions) > MAX_SESSIONS_FOR_USER: # High retry amount to avoid flakiness in ASAN (+Analyzer) tests assert_logs_contain_with_retry( - instance, "overflown session count", retry_count=60 + instance, "overflown session count", retry_count=120 ) instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 20b004a7605..6fbe7634642 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -809,7 +809,7 @@ def test_read_subcolumn_time(cluster): def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( - f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont_not_exists', 'test_table.csv', " + f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont-not-exists', 'test_table.csv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b93e560d5b9..bf3c28c5429 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1780,6 +1780,7 @@ def test_commit_on_limit(started_cluster): if "test_999999.csv" in get_processed_files(): break time.sleep(1) + assert "test_999999.csv" in get_processed_files() assert 1 == int( diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index ffbf4df4ba7..13146f2eab0 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index c54ca0b084e..ae9b6bb7b2c 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-asan +-- Tags: long, no-object-storage, no-asan SET joined_subquery_requires_alias = 0; diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index af746c43da9..5a4fd901f8d 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-tsan +# Tags: long, no-object-storage, no-tsan # no-s3 because read FileOpen metric set -e diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index 4613576cf4e..009fc0bbb9f 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-random-merge-tree-settings +-- Tags: long, no-object-storage, no-random-merge-tree-settings SET output_format_pretty_row_numbers = 0; DROP TABLE IF EXISTS check_system_tables; diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 046e4efaa85..2006d43cdd2 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Tags: long, no-object-storage, no-msan, no-asan, no-tsan, no-debug # Some kind of stress test, it doesn't make sense to test in a non-release build set -e diff --git a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 16ebf2e6e54..c3ad29d33a1 100755 --- a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part ENGINE = MergeTree() ORDER BY id PARTITION BY id -SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0 +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0, min_bytes_for_wide_part=0, min_bytes_for_full_part_storage = 0 " diff --git a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql index 813ebf3f5a7..4d1cd54306c 100644 --- a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql +++ b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- With s3 policy TTL TO DISK 'default' doesn't work (because we have no default, only 's3') drop table if exists ttl; diff --git a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql index 3a05e4507a2..166f44df2a7 100644 --- a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql +++ b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- Output slightly different plan drop table if exists t; diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 55d4162fc48..804cdf48fb6 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -29,14 +29,20 @@ select 'parts'; select type, has_watch, op_num, replace(path, toString(serverUUID()), ''), is_ephemeral, is_sequential, if(startsWith(path, '/clickhouse/sessions'), 1, version), requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0') +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0' + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; select 'blocks'; select type, has_watch, op_num, path, is_ephemeral, is_sequential, version, requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500)) +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500) + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; drop table rmt sync; diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 5019abc38ab..f2d071961ee 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,4 @@ --- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +-- Tags: no-debug, no-parallel, long, no-object-storage, no-random-settings, no-random-merge-tree-settings SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01221_system_settings.sql b/tests/queries/0_stateless/01221_system_settings.sql index fcffd6c45fe..da0204b37bd 100644 --- a/tests/queries/0_stateless/01221_system_settings.sql +++ b/tests/queries/0_stateless/01221_system_settings.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage select * from system.settings where name = 'send_timeout'; select * from system.merge_tree_settings order by length(description) limit 1; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 047b1cc3ee7..9d74474c1a4 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage, no-parallel, no-fasttest --- no-s3-storage: s3 has 20 more threads +-- Tags: no-object-storage, no-parallel, no-fasttest +-- no-object-storage: s3 has 20 more threads -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index e83e49dffef..33b8f413fd5 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage, no-msan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-object-storage, no-msan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index d3d3d3e1ac5..e838af8af9b 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long, no-debug, no-s3-storage +# Tags: no-fasttest, long, no-debug, no-object-storage # This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 97148dc268e..2e27c2f7728 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index 614629351ef..15c9ec16700 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01343; CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01343 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql index 2e5ec563641..76cb535dcb7 100644 --- a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01344; CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01344 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index 8d4e3cb779b..d6eec2f84a1 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-settings +-- Tags: no-object-storage, no-random-settings SET use_uncompressed_cache = 0; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index 5a30f9e0f08..f74f6755e59 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index c3c87eeaf8b..6098c826e32 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-s3-storage +# Tags: race, zookeeper, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 49ef9d8b79f..e53f4476ec6 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -1,4 +1,4 @@ --- Tags: long, replica, no-replicated-database, no-parallel, no-s3-storage +-- Tags: long, replica, no-replicated-database, no-parallel, no-object-storage -- Tag no-replicated-database: Fails due to additional replicas or shards -- Tag no-parallel: static zk path diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 1a6f0ec395e..80e9fc7e2fb 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index 95b46c69e83..b5ece08196e 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings DROP TABLE IF EXISTS data_01551; diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index ec957dd7a02..06ec67ee82d 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -79,3 +79,16 @@ iPhone 900 Smartphone 500 500 Kindle Fire 150 Tablet 150 350 Samsung Galaxy Tab 200 Tablet 175 350 iPad 700 Tablet 350 350 +---- Q8 ---- +Lenovo Thinkpad Laptop 700 1 0 +Sony VAIO Laptop 700 1 0 +Dell Vostro Laptop 800 3 0.6666666666666666 +HP Elite Laptop 1200 4 1 +Microsoft Lumia Smartphone 200 1 0 +HTC One Smartphone 400 2 0.3333333333333333 +Nexus Smartphone 500 3 0.6666666666666666 +iPhone Smartphone 900 4 1 +Kindle Fire Tablet 150 1 0 +Samsung Galaxy Tab Tablet 200 2 0.5 +iPad Tablet 700 3 1 +Others Unknow 200 1 0 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index f0d173b1f20..a660fcca7b2 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -101,5 +101,26 @@ SELECT FROM products INNER JOIN product_groups USING (group_id)) t order by group_name, product_name, price; +select '---- Q8 ----'; +INSERT INTO product_groups VALUES (4, 'Unknow'); +INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200); + +SELECT * +FROM +( + SELECT + product_name, + group_name, + price, + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent + FROM products + INNER JOIN product_groups USING (group_id) +) AS t +ORDER BY + group_name ASC, + price ASC, + product_name ASC; + drop table product_groups; drop table products; diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.reference b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh new file mode 100755 index 00000000000..2e47034e528 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# NOTE: database = $CLICKHOUSE_DATABASE is unwanted +verify_sql="SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" + +# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. +# So, there is inherent race condition. But it should get expected result eventually. +# In case of test failure, this code will do infinite loop and timeout. +verify() +{ + while true + do + result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) + [ "$result" = "1" ] && break + sleep 0.1 + done + echo 1 +} + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +verify + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +verify + +$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +verify + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +verify diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 187ff5c37e1..9b96ce3e586 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,6 +1,6 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-s3-storage +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage -- no-tsan: too slow --- no-s3-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher +-- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql index dfc761e1764..f7622bcf98f 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists data_01643; diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql index 54c30fa2b1a..992cc687c88 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- no-parallel -- for flaky check and to avoid "Removing leftovers from table" (for other tables) -- Temporarily skip warning 'table was created by another server at the same moment, will retry' diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index be0f7e8b710..921d28e6399 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-replicated-database, no-parallel, no-s3-storage +-- Tags: zookeeper, no-replicated-database, no-parallel, no-object-storage drop table if exists x; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 3782a7d3ad6..c38fc505fa8 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-object-storage # Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference deleted file mode 100644 index a1cd31e2dc9..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ /dev/null @@ -1,47 +0,0 @@ -0 0 1 -0 1 0 -SELECT - id IS NULL, - `n.null`, - NOT `n.null` -FROM t_func_to_subcolumns -3 0 1 0 -0 1 0 \N -SELECT - `arr.size0`, - `arr.size0` = 0, - `arr.size0` != 0, - empty(n) -FROM t_func_to_subcolumns -['foo','bar'] [1,2] -[] [] -SELECT - `m.keys`, - `m.values` -FROM t_func_to_subcolumns -1 -SELECT sum(NOT `n.null`) -FROM t_func_to_subcolumns -2 -SELECT count(id) -FROM t_func_to_subcolumns -1 0 0 -2 1 0 -3 0 0 -SELECT - id, - `n.null`, - right.n IS NULL -FROM t_func_to_subcolumns AS left -ALL FULL OUTER JOIN -( - SELECT - 1 AS id, - \'qqq\' AS n - UNION ALL - SELECT - 3 AS id, - \'www\' -) AS right USING (id) -0 10 -0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference new file mode 100644 index 00000000000..32bacfba5ea --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -0,0 +1,181 @@ +0 0 1 +0 1 0 +QUERY id: 0 + PROJECTION COLUMNS + isNull(id) UInt8 + isNull(n) UInt8 + isNotNull(n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + CONSTANT id: 2, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 3, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: n.null, result_type: UInt8, source_id: 6 + FUNCTION id: 8, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: n.null, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT + _CAST(0, \'UInt8\') AS `isNull(id)`, + __table1.`n.null` AS `isNull(n)`, + NOT __table1.`n.null` AS `isNotNull(n)` +FROM default.t_func_to_subcolumns AS __table1 +3 0 1 0 +0 1 0 \N +QUERY id: 0 + PROJECTION COLUMNS + length(arr) UInt64 + empty(arr) UInt8 + notEmpty(arr) UInt8 + empty(n) Nullable(UInt8) + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: arr.size0, result_type: UInt64, source_id: 3 + FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 12, function_name: empty, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: n, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`arr.size0` = 0 AS `empty(arr)`, + __table1.`arr.size0` != 0 AS `notEmpty(arr)`, + empty(__table1.n) AS `empty(n)` +FROM default.t_func_to_subcolumns AS __table1 +['foo','bar'] [1,2] +[] [] +QUERY id: 0 + PROJECTION COLUMNS + mapKeys(m) Array(String) + mapValues(m) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: m.keys, result_type: Array(String), source_id: 3 + COLUMN id: 4, column_name: m.values, result_type: Array(UInt64), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT + __table1.`m.keys` AS `mapKeys(m)`, + __table1.`m.values` AS `mapValues(m)` +FROM default.t_func_to_subcolumns AS __table1 +1 +QUERY id: 0 + PROJECTION COLUMNS + count(n) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT sum(NOT __table1.`n.null`) AS `count(n)` +FROM default.t_func_to_subcolumns AS __table1 +2 +QUERY id: 0 + PROJECTION COLUMNS + count(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT count(__table1.id) AS `count(id)` +FROM default.t_func_to_subcolumns AS __table1 +1 0 0 +2 1 0 +3 0 0 +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + isNull(n) UInt8 + isNull(right.n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 + CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 6, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: n, result_type: String, source_id: 9 + JOIN TREE + JOIN id: 10, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + RIGHT TABLE EXPRESSION + UNION id: 9, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + QUERIES + LIST id: 11, nodes: 2 + QUERY id: 12, alias: __table3 + PROJECTION COLUMNS + id UInt8 + PROJECTION + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE id: 15, alias: __table4, table_name: system.one + QUERY id: 16, alias: __table5 + PROJECTION COLUMNS + id UInt8 + PROJECTION + LIST id: 17, nodes: 1 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 19, alias: __table6, table_name: system.one + JOIN EXPRESSION + LIST id: 20, nodes: 1 + COLUMN id: 21, column_name: id, result_type: UInt64, source_id: 10 + EXPRESSION + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 24, column_name: id, result_type: UInt8, source_id: 9 + +SELECT + __table1.id AS id, + __table1.`n.null` AS `isNull(n)`, + _CAST(0, \'UInt8\') AS `isNull(right.n)` +FROM default.t_func_to_subcolumns AS __table1 +ALL FULL OUTER JOIN +( + ( + SELECT 1 AS id + FROM system.one AS __table4 + ) + UNION ALL + ( + SELECT 3 AS id + FROM system.one AS __table6 + ) +) AS __table2 USING (id) +0 10 +0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql similarity index 62% rename from tests/queries/0_stateless/01872_functions_to_subcolumns.sql rename to tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql index 45f83bf20e5..b544f6829cf 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -1,5 +1,6 @@ DROP TABLE IF EXISTS t_func_to_subcolumns; +SET allow_experimental_analyzer = 1; SET optimize_functions_to_subcolumns = 1; CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) @@ -8,24 +9,24 @@ ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); -EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); DROP TABLE t_func_to_subcolumns; diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference index e6a2b2b6aaf..ab832478da0 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00000001 39 Hello 00000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000001 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0010010000110010 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 8 Hel 0100100001100101 8 Hel 0100100001100101 8 Hel 0100100001100101 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hello 0000000010010000110010101101100011011000 8 Hello 01001000011001010110110001101100 9 Hello 00100100001100101011011000110110 --1 Hel 0 Hel 010010000110010101101100 1 Hel 001001000011001010110110 7 Hel 000000001001000011001010 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 00100100 23 Hel 00000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 9 Hello\0\0\0\0\0 00000000001001000011001010110110001101100011011110000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 00100100001100101011011000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql index 0ee04e408ba..40fccbc89e6 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftRight('Hello', 37)); SELECT 38,'Hello',bin(bitShiftRight('Hello', 38)); SELECT 39,'Hello',bin(bitShiftRight('Hello', 39)); SELECT 40,'Hello',bin(bitShiftRight('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftRight('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftRight('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftRight(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 1 SELECT 78,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_right_string_integer; CREATE TABLE test_bit_shift_right_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); +SELECT bin(bitShiftRight('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftRight(str, 7)) as string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftRight(str, 8)) as string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftRight('Hello', id)) as string_res FROM test_bit_shift_right_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftRight(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftRight(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference index ff5a09c0d48..a20c44bbb9a 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00010010000110010101101100011011000110111100000000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0000000010010000110010101101100000000000 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hello 001001000011001010110110001101100011011110000000 8 Hello 010010000110010101101100011011000110111100000000 9 Hello 00000000100100001100101011011000110110001101111000000000 --1 Hel 0 Hel 010010000110010101101100 1 Hel 00000000100100001100101011011000 7 Hel 00100100001100101011011000000000 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 000000001001000011001010110110000000000000000000 23 Hel 001001000011001010110110000000000000000000000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 9 Hello\0\0\0\0\0 11001010110110001101100011011110000000000000000000000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 10010000110010101101100000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql index 5c7a9901dae..a8e66eda281 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftLeft('Hello', 37)); SELECT 38,'Hello',bin(bitShiftLeft('Hello', 38)); SELECT 39,'Hello',bin(bitShiftLeft('Hello', 39)); SELECT 40,'Hello',bin(bitShiftLeft('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftLeft('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftLeft('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftLeft(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10 SELECT 78,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_left_string_integer; CREATE TABLE test_bit_shift_left_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); +SELECT bin(bitShiftLeft('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 7)) as string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 8)) as string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftLeft('Hello', id)) as string_res FROM test_bit_shift_left_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftLeft(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftLeft(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference deleted file mode 100644 index 975e9876237..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ /dev/null @@ -1,4 +0,0 @@ -SELECT has(`m.keys`, \'a\') -FROM t_map_contains -1 -0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.reference b/tests/queries/0_stateless/02115_map_contains_analyzer.reference new file mode 100644 index 00000000000..7da5243e727 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.reference @@ -0,0 +1,4 @@ +SELECT has(__table1.`m.keys`, \'a\') AS `mapContains(m, \'a\')` +FROM default.t_map_contains AS __table1 +1 +0 diff --git a/tests/queries/0_stateless/02115_map_contains.sql b/tests/queries/0_stateless/02115_map_contains_analyzer.sql similarity index 70% rename from tests/queries/0_stateless/02115_map_contains.sql rename to tests/queries/0_stateless/02115_map_contains_analyzer.sql index 3c7f21cb4f1..46e02eca4f0 100644 --- a/tests/queries/0_stateless/02115_map_contains.sql +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.sql @@ -5,8 +5,9 @@ CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; -EXPLAIN SYNTAX SELECT mapContains(m, 'a') FROM t_map_contains; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapContains(m, 'a') FROM t_map_contains; SELECT mapContains(m, 'a') FROM t_map_contains; DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference deleted file mode 100644 index 121b08d02f1..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ /dev/null @@ -1,25 +0,0 @@ -1 -SELECT `t1.a` -FROM t_tuple_element -a -SELECT `t1.s` -FROM t_tuple_element -1 -SELECT `t1.a` -FROM t_tuple_element -2 -SELECT `t2.1` -FROM t_tuple_element -2 -SELECT `t2.1` -FROM t_tuple_element -1 2 -WITH (1, 2) AS t -SELECT - t.1, - t.2 -1 2 -WITH CAST(\'(1, 2)\', \'Tuple(a UInt32, b UInt32)\') AS t -SELECT - t.1, - tupleElement(t, \'b\') diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference new file mode 100644 index 00000000000..22d48ffb2f3 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -0,0 +1,25 @@ +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, 1)` +FROM default.t_tuple_element AS __table1 +a +SELECT __table1.`t1.s` AS `tupleElement(t1, 2)` +FROM default.t_tuple_element AS __table1 +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, \'a\')` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +1 2 +SELECT + _CAST(1, \'UInt8\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt8\') AS `tupleElement(t, 2)` +FROM system.one AS __table1 +1 2 +SELECT + _CAST(1, \'UInt32\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt32\') AS `tupleElement(t, \'b\')` +FROM system.one AS __table1 diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql similarity index 60% rename from tests/queries/0_stateless/02116_tuple_element.sql rename to tests/queries/0_stateless/02116_tuple_element_analyzer.sql index 64d9b9db331..5aeb72c9ee4 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql @@ -4,39 +4,40 @@ CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, Stri INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; SELECT t1.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t1.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t1.1 FROM t_tuple_element; SELECT tupleElement(t1, 2) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 2) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 2) FROM t_tuple_element; SELECT tupleElement(t1, 'a') FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t2.1 FROM t_tuple_element; SELECT tupleElement(t2, 1) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; WITH (1, 2) AS t SELECT t.1, t.2; -EXPLAIN SYNTAX WITH (1, 2) AS t SELECT t.1, t.2; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2) AS t SELECT t.1, t.2; WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); -EXPLAIN SYNTAX WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 9d87542d84d..d0e61541b15 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-replicated-database +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings, no-replicated-database # set -x diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index ca1ee2738c7..6d86d995143 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,5 +1,5 @@ --- Tags: long, no-parallel, no-s3-storage --- no-s3-storage: Avoid flakiness due to cache / buffer usage +-- Tags: long, no-parallel, no-object-storage +-- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush diff --git a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql index 8bc75040e5a..48af5ae0031 100644 --- a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql +++ b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index ee92931ec54..b791ee18e82 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index a609702f22a..40c80e04697 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 57b8cec7864..8faf0a08f1f 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 1028fba76f5..f8e7b7e7e72 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 7a665d81eab..fe016f5a27f 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index 5f80d9d7f6d..f1602e47e01 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-parallel +# Tags: no-object-storage, no-random-settings, no-parallel set -eo pipefail CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a2c9352b7aa..32c9e9cb060 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index f723284ad61..151ff275f7b 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -12,9 +12,9 @@ SELECT * FROM t_tuple_numeric FORMAT JSONEachRow; SELECT `t`.`1`.`2`, `t`.`1`.`3`, `t`.`4` FROM t_tuple_numeric; SELECT t.1.1, t.1.2, t.2 FROM t_tuple_numeric; -SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER} +SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER, ARGUMENT_OUT_OF_BOUND} DROP TABLE t_tuple_numeric; diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index fbaec1ffaa7..b54e3d7f805 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index bf4622adedc..1dc1e980846 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage +-- Tags: no-parallel, no-fasttest, no-object-storage DROP TABLE IF EXISTS t_sparse_s3; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index d73ac66763e..0f9dbd0247d 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- produces different pipeline if enabled set enable_memory_bound_merging_of_aggregation_results = 0; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index a5a3da82324..105fb500461 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +-- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index e150d70b896..98c9cf9b7b4 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-merge-tree-settings -# Tag no-s3-storage: s3 does not have fsync +# Tags: no-object-storage, no-random-merge-tree-settings +# Tag no-object-storage: s3 does not have fsync CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference index 6276cd6d867..360c1aa9899 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -1,7 +1,4 @@ -- binary functions 4 7 -1 -1 -1 7 3 1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql index 824022b564c..687f3afb5ee 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -1,8 +1,5 @@ set dialect='kusto'; print ' -- binary functions'; print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; print binary_xor(2, 5), bitset_count_ones(42); print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh index e6cd63da95d..81376ee3791 100755 --- a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh +++ b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-s3-storage +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql index 6cb1c0774aa..a2d46cf6d1b 100644 --- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql +++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-replicated-database +-- Tags: no-object-storage, no-replicated-database DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index c111ed40a29..dfd2f12b55b 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-s3-storage, no-fasttest, no-replicated-database +# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-object-storage, no-fasttest, no-replicated-database set -e diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 4f3fd0e54f6..5aeab4c746e 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +# Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index 55723360c38..b4d31e234d8 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage +-- Tags: long, no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496b..71f42e2a6db 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-debug -# - no-s3-storage - S3 has additional logging +# Tags: no-object-storage, no-debug +# - no-object-storage - S3 has additional logging # - no-debug - debug builds also has additional logging CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql index 0891f1aa8a2..f926b9037d2 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_grouping_sets_predicate; diff --git a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql index 785fb10f70b..361305bac6d 100644 --- a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql +++ b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists tvm; create table tvm (c0 UInt64, c1 UInt64, c2 UInt64, c3 UInt64, c4 UInt64, c5 UInt64, c6 UInt64, c7 UInt64, c8 UInt64, c9 UInt64, c10 UInt64, c11 UInt64, c12 UInt64, c13 UInt64, c14 UInt64, c15 UInt64, c16 UInt64, c17 UInt64, c18 UInt64, c19 UInt64, c20 UInt64, c21 UInt64, c22 UInt64, c23 UInt64, c24 UInt64, c25 UInt64, c26 UInt64, c27 UInt64, c28 UInt64, c29 UInt64, c30 UInt64, c31 UInt64, c32 UInt64, c33 UInt64, c34 UInt64, c35 UInt64, c36 UInt64, c37 UInt64, c38 UInt64, c39 UInt64, c40 UInt64, c41 UInt64, c42 UInt64, c43 UInt64, c44 UInt64, c45 UInt64, c46 UInt64, c47 UInt64, c48 UInt64, c49 UInt64, c50 UInt64, c51 UInt64, c52 UInt64, c53 UInt64, c54 UInt64, c55 UInt64, c56 UInt64, c57 UInt64, c58 UInt64, c59 UInt64, c60 UInt64, c61 UInt64, c62 UInt64, c63 UInt64, c64 UInt64, c65 UInt64, c66 UInt64, c67 UInt64, c68 UInt64, c69 UInt64, c70 UInt64, c71 UInt64, c72 UInt64, c73 UInt64, c74 UInt64, c75 UInt64, c76 UInt64, c77 UInt64, c78 UInt64, c79 UInt64, c80 UInt64, c81 UInt64, c82 UInt64, c83 UInt64, c84 UInt64, c85 UInt64, c86 UInt64, c87 UInt64, c88 UInt64, c89 UInt64, c90 UInt64, c91 UInt64, c92 UInt64, c93 UInt64, c94 UInt64, c95 UInt64, c96 UInt64, c97 UInt64, c98 UInt64, c99 UInt64, c100 UInt64, c101 UInt64, c102 UInt64, c103 UInt64, c104 UInt64, c105 UInt64, c106 UInt64, c107 UInt64, c108 UInt64, c109 UInt64, c110 UInt64, c111 UInt64, c112 UInt64, c113 UInt64, c114 UInt64, c115 UInt64, c116 UInt64, c117 UInt64, c118 UInt64, c119 UInt64, c120 UInt64, c121 UInt64, c122 UInt64, c123 UInt64, c124 UInt64, c125 UInt64, c126 UInt64, c127 UInt64, c128 UInt64, c129 UInt64, c130 UInt64, c131 UInt64, c132 UInt64, c133 UInt64, c134 UInt64, c135 UInt64, c136 UInt64, c137 UInt64, c138 UInt64, c139 UInt64, c140 UInt64, c141 UInt64, c142 UInt64, c143 UInt64, c144 UInt64, c145 UInt64, c146 UInt64, c147 UInt64, c148 UInt64, c149 UInt64, c150 UInt64, c151 UInt64, c152 UInt64, c153 UInt64, c154 UInt64, c155 UInt64, c156 UInt64, c157 UInt64, c158 UInt64, c159 UInt64, c160 UInt64, c161 UInt64, c162 UInt64, c163 UInt64, c164 UInt64, c165 UInt64, c166 UInt64, c167 UInt64, c168 UInt64, c169 UInt64, c170 UInt64, c171 UInt64, c172 UInt64, c173 UInt64, c174 UInt64, c175 UInt64, c176 UInt64, c177 UInt64, c178 UInt64, c179 UInt64, c180 UInt64, c181 UInt64, c182 UInt64, c183 UInt64, c184 UInt64, c185 UInt64, c186 UInt64, c187 UInt64, c188 UInt64, c189 UInt64, c190 UInt64, c191 UInt64, c192 UInt64, c193 UInt64, c194 UInt64, c195 UInt64, c196 UInt64, c197 UInt64, c198 UInt64, c199 UInt64, c200 UInt64, c201 UInt64, c202 UInt64, c203 UInt64, c204 UInt64, c205 UInt64, c206 UInt64, c207 UInt64, c208 UInt64, c209 UInt64, c210 UInt64, c211 UInt64, c212 UInt64, c213 UInt64, c214 UInt64, c215 UInt64, c216 UInt64, c217 UInt64, c218 UInt64, c219 UInt64, c220 UInt64, c221 UInt64, c222 UInt64, c223 UInt64, c224 UInt64, c225 UInt64, c226 UInt64, c227 UInt64, c228 UInt64, c229 UInt64, c230 UInt64, c231 UInt64, c232 UInt64, c233 UInt64, c234 UInt64, c235 UInt64, c236 UInt64, c237 UInt64, c238 UInt64, c239 UInt64, c240 UInt64, c241 UInt64, c242 UInt64, c243 UInt64, c244 UInt64, c245 UInt64, c246 UInt64, c247 UInt64, c248 UInt64, c249 UInt64, c250 UInt64, c251 UInt64, c252 UInt64, c253 UInt64, c254 UInt64, c255 UInt64, c256 UInt64, c257 UInt64, c258 UInt64, c259 UInt64, c260 UInt64, c261 UInt64, c262 UInt64, c263 UInt64, c264 UInt64, c265 UInt64, c266 UInt64, c267 UInt64, c268 UInt64, c269 UInt64, c270 UInt64, c271 UInt64, c272 UInt64, c273 UInt64, c274 UInt64, c275 UInt64, c276 UInt64, c277 UInt64, c278 UInt64, c279 UInt64, c280 UInt64, c281 UInt64, c282 UInt64, c283 UInt64, c284 UInt64, c285 UInt64, c286 UInt64, c287 UInt64, c288 UInt64, c289 UInt64, c290 UInt64, c291 UInt64, c292 UInt64, c293 UInt64, c294 UInt64, c295 UInt64, c296 UInt64, c297 UInt64, c298 UInt64, c299 UInt64) engine = MergeTree order by tuple() settings min_rows_for_wide_part = 10, min_bytes_for_wide_part=0, vertical_merge_algorithm_min_rows_to_activate=1; diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql index cb6b1b6083e..406cab82183 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index c78cd202f1b..6f43c1ae869 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 31cf6e9606e..4f6a300c5b3 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 748bf856deb..8cb03a93a7a 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 1a8402dff4b..8e4d4f5b3e0 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index eef52002e36..78659b70129 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql index 6b2961f0555..91e8624057c 100644 --- a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql +++ b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t0 (vkey UInt32, pkey UInt32, c0 UInt32) engine = TinyLog; CREATE TABLE t1 (vkey UInt32) ENGINE = AggregatingMergeTree ORDER BY vkey; INSERT INTO t0 VALUES (15, 25000, 58); -SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); +SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); -- { serverError ARGUMENT_OUT_OF_BOUND } DROP TABLE t0; DROP TABLE t1; diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 index eee236ff681..1ca5cc0bb7e 100644 --- a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage --- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) +-- Tags: no-object-storage +-- Tag: no-object-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) set mutations_sync=1; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index b44f9e50513..8a4a2e906b0 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh index a3b0d17f1be..87fbffdb1e6 100755 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +QUERY_ID_PREFIX="02832_alter_max_sessions_query_$$" PROFILE="02832_alter_max_sessions_profile_$$" USER="02832_alter_max_sessions_user_$$" USER2="02832_alter_max_sessions_user_two_$$" @@ -15,6 +17,26 @@ ${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" +function run_sessions_set() +{ + local sessions_count="$1" + local session_check="$2" + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + local query_id="${QUERY_ID_PREFIX}_${i}" + # Write only expected error text + # More than alter_sessions_count queries will not start. + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&query_id=${query_id}&session_id=${session_id}&session_check=${session_check}&session_timeout=600&function_sleep_max_microseconds_per_block=120000000" --data-binary "SELECT sleep(120)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}" + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' SYNC" >/dev/null + done + + wait +} + function test_alter_profile() { local max_session_count="$1" @@ -24,23 +46,13 @@ function test_alter_profile() ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" # Create sessions with $max_session_count restriction - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # Skip output from this query - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null - done + run_sessions_set $max_session_count 0 # Update restriction to $alter_sessions_count ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" # Simultaneous sessions should use max settings from profile ($alter_sessions_count) - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # ignore select 1, we need only errors - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & - done - - wait + run_sessions_set $max_session_count 1 } test_alter_profile 1 1 diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql index 3a751294cba..da2f050cf38 100644 --- a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-object-storage drop table if exists t_multi_prewhere; drop row policy if exists policy_02834 on t_multi_prewhere; diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh index edfed206d87..07d2ee27d22 100755 --- a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference new file mode 100644 index 00000000000..a7ff5caa0b0 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -0,0 +1,31 @@ +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +10 +0 +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) +10 +CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After add statistic +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) +20 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql new file mode 100644 index 00000000000..fe612efe2ac --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -0,0 +1,59 @@ +-- Tests that various DDL statements create/drop/materialize statistics + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; +SELECT count(*) FROM tab WHERE b < NULL and a < '10'; + +ALTER TABLE tab DROP STATISTICS a, b; + +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; + +SELECT 'After add statistic'; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +OPTIMIZE TABLE tab FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +ALTER TABLE tab RENAME COLUMN b TO c; +SHOW CREATE TABLE tab; + +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE c < 10 and a < 10; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql index c531d39cd69..289ffee6600 100644 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -1,57 +1,55 @@ -DROP TABLE IF EXISTS t1; +-- Tests creating/dropping/materializing statistics produces the right exceptions. -CREATE TABLE t1 +DROP TABLE IF EXISTS tab; + +-- Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } SET allow_experimental_statistics = 1; -CREATE TABLE t1 +-- The same type of statistics can't exist more than once on a column +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64, - pk String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTICS } + a Float64 STATISTICS(tdigest, tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- Unknown statistics types are rejected +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest, tdigest(10)), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(no_statistics_type) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- tDigest statistics can only be created on numeric columns +CREATE TABLE tab ( - a Float64 STATISTICS(xyz), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE t1 +CREATE TABLE tab ( a Float64, - b Int64, - pk String, -) Engine = MergeTree() ORDER BY pk; + b String +) Engine = MergeTree() ORDER BY tuple(); -ALTER TABLE t1 ADD STATISTICS a TYPE xyz; -- { serverError INCORRECT_QUERY } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- Statistics can be created only on integer columns -ALTER TABLE t1 MODIFY STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS pk TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS a; -ALTER TABLE t1 DROP STATISTICS IF EXISTS a; -ALTER TABLE t1 CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 CLEAR STATISTICS IF EXISTS a; -ALTER TABLE t1 MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS a; +ALTER TABLE tab DROP STATISTICS IF EXISTS a; +ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; +ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS b TYPE tdigest; -ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } -DROP TABLE t1; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference similarity index 100% rename from tests/queries/0_stateless/03164_materialize_statistics.reference rename to tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql new file mode 100644 index 00000000000..3e15ec1148e --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql @@ -0,0 +1,52 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_analyzer = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; + +OPTIMIZE TABLE tab FINAL; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; + +TRUNCATE TABLE tab; +SET mutations_sync = 2; + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; + +DROP TABLE tab; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM tab%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO tab SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_operate.reference b/tests/queries/0_stateless/02864_statistics_operate.reference deleted file mode 100644 index 6398a9bd000..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.reference +++ /dev/null @@ -1,31 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 diff --git a/tests/queries/0_stateless/02864_statistics_operate.sql b/tests/queries/0_stateless/02864_statistics_operate.sql deleted file mode 100644 index bf69c11bc91..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.sql +++ /dev/null @@ -1,57 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; - -ALTER TABLE t1 DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 MATERIALIZE STATISTICS a, b; -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -ALTER TABLE t1 RENAME COLUMN b TO c; -SHOW CREATE TABLE t1; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index ddad7a1904b..76ada756f47 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage +# Tags: no-fasttest, no-parallel, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 8a6904b6bd7..15f169d880f 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -9,8 +9,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" + # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" + # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" + +# But it is allowed with the special setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 2e344a6b6e5..6f454da40da 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.reference b/tests/queries/0_stateless/02967_prewhere_no_columns.reference new file mode 100644 index 00000000000..df105254618 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.reference @@ -0,0 +1,2 @@ +105 +105 diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.sql b/tests/queries/0_stateless/02967_prewhere_no_columns.sql new file mode 100644 index 00000000000..efcc952caa2 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.sql @@ -0,0 +1,51 @@ +CREATE TABLE t_02967 +( + `key` Date, + `value` UInt16 +) +ENGINE = MergeTree +ORDER BY key +SETTINGS + index_granularity_bytes = 0 --8192 --, min_index_granularity_bytes = 2 + , index_granularity = 100 + , min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 +-- +-- , min_bytes_for_wide_part = 2 +AS SELECT + number, + repeat(toString(number), 5) +FROM numbers(105.); + + + +-- Check with newly inserted data part. It's in-memory structured are filled at insert time. +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + + + +-- Reload part form disk to check that in-meory structures where properly serilaized-deserialized +DETACH TABLE t_02967; +ATTACH TABLE t_02967; + + +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + +DROP TABLE t_02967; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference new file mode 100644 index 00000000000..3389ea44074 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -0,0 +1,5 @@ +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`n.null` AS `isNull(n)` +FROM default.t_column_names AS __table1 +{"length(arr)":"3","isNull(n)":0} diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql new file mode 100644 index 00000000000..48e5232d18b --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_column_names; + +CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Memory; + +INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; + +DROP TABLE t_column_names; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference new file mode 100644 index 00000000000..9488291c8ff --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -0,0 +1,12 @@ +SELECT __table1.`m.size0` AS `length(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +2 +1 +SELECT __table1.`m.size0` = 0 AS `empty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +0 +0 +SELECT __table1.`m.size0` != 0 AS `notEmpty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +1 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql new file mode 100644 index 00000000000..e8a752a82d5 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_map; + +CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + +DROP TABLE t_func_to_subcolumns_map; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference new file mode 100644 index 00000000000..04616738a15 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -0,0 +1,4 @@ +SELECT __table1.`v.String` AS `variantElement(v, \'String\')` +FROM default.t_func_to_subcolumns_variant AS __table1 +foo +\N diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql new file mode 100644 index 00000000000..511bcc44514 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_variant; + +SET allow_experimental_variant_type = 1; + +CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +DROP TABLE t_func_to_subcolumns_variant; diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index 72749c905a3..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 1 1 +1 diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 263a4535c0e..84183606d48 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference new file mode 100644 index 00000000000..3051c199363 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference @@ -0,0 +1,25 @@ +3 +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 +) +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) +2 +SELECT length(__table1.arr) AS `length(arr)` +FROM default.t_length_1 AS __table1 +FINAL +WHERE length(__table1.arr) IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql new file mode 100644 index 00000000000..3fe29139c5f --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS t_length_1; +DROP TABLE IF EXISTS t_length_2; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; +SET optimize_on_insert = 0; + +CREATE TABLE t_length_1 (id UInt64, arr Array(UInt64)) ENGINE = ReplacingMergeTree ORDER BY id; +CREATE TABLE t_length_2 (id UInt64, arr_length UInt64) ENGINE = ReplacingMergeTree ORDER BY id; + +INSERT INTO t_length_1 VALUES (1, [1, 2, 3]), (2, [4, 5]); +INSERT INTO t_length_2 VALUES (1, 3), (1, 2), (2, 2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +DROP TABLE t_length_1; +DROP TABLE t_length_2; diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index 5fac964a219..d51e180efc9 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings, no-s3-storage, no-replicated-database, no-shared-merge-tree +# Tags: no-random-settings, no-object-storage, no-replicated-database, no-shared-merge-tree # Tag no-random-settings: enable after root causing flakiness CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference new file mode 100644 index 00000000000..202e4557a33 --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -0,0 +1,2 @@ +query +String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql new file mode 100644 index 00000000000..b26096f7f0e --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -0,0 +1,18 @@ + +SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; + +SELECT * FROM fuzzQuery('SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; +', 500, 8956) LIMIT 10 FORMAT NULL; diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 526c4f84030..09bdd7f6b56 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference index 36ced212a1b..eacf16d0295 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.reference +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -1,52 +1,52 @@ -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -1 Str1 example1.csv test/03036_archive1.tar::example1.csv -2 Str2 example1.csv test/03036_archive1.tar::example1.csv -7 Str7 example4.csv test/03036_archive1.tar::example4.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive1.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv -12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv -14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 25 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 25 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 25 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 29 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 29 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 00d7cc25e1a..43bda4ee704 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -1,22 +1,22 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); -select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from table_zip22 ORDER BY (id, _file, _path); CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); -SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); -SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index e69de29bb2d..c5a6cbab0bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -0,0 +1,2 @@ +1231 John 33 +8888 Alice 50 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index cd29fae8fd7..b189388e356 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -5,7 +5,8 @@ CREATE TABLE users ( uid Int16, name String, age Int16, - projection p1 (select count(), age group by age) + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid; INSERT INTO users VALUES (1231, 'John', 33); @@ -13,3 +14,18 @@ INSERT INTO users VALUES (6666, 'Ksenia', 48); INSERT INTO users VALUES (8888, 'Alice', 50); DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; + +-- expecting no projection +SELECT + name, + `table` +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users'); + +SELECT * FROM users ORDER BY uid; + +DROP TABLE users; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql deleted file mode 100644 index 43c5724dd59..00000000000 --- a/tests/queries/0_stateless/03164_materialize_statistics.sql +++ /dev/null @@ -1,49 +0,0 @@ -DROP TABLE IF EXISTS t_statistics_materialize; - -SET allow_experimental_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET materialize_statistics_on_insert = 0; - -CREATE TABLE t_statistics_materialize -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE t_statistics_materialize FINAL; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE t_statistics_materialize; -SET mutations_sync = 2; - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE t_statistics_materialize MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE t_statistics_materialize; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM t_statistics_materialize%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO t_statistics_materialize SELECT%' - AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.reference b/tests/queries/0_stateless/03165_round_scale_as_column.reference index 9ad25ed466a..e0c9b6959ee 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.reference +++ b/tests/queries/0_stateless/03165_round_scale_as_column.reference @@ -2162,4 +2162,17 @@ CHECKPOINT2 10 1.6275 1.6275 1.6275 1.6275 1 1 +3 +3.1 +3.14 +3.142 +3.1416 +3.14159 +3.141593 +3.1415927 +3.14159265 +3.141592654 +42 +42.4 +42.42 1 diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.sql b/tests/queries/0_stateless/03165_round_scale_as_column.sql index 229f705808d..adae36564b8 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.sql +++ b/tests/queries/0_stateless/03165_round_scale_as_column.sql @@ -118,6 +118,7 @@ DROP TABLE tab; SELECT round(1, 1); SELECT round(materialize(1), materialize(1)); -SELECT round(1, materialize(1)); --{serverError ILLEGAL_COLUMN} +SELECT round(pi(), number) FROM numbers(10); +SELECT round(toDecimal32(42.42, 2), number) from numbers(3); SELECT round(materialize(1), 1); SELECT materialize(10.1) AS x, ceil(x, toUInt256(123)); --{serverError ILLEGAL_TYPE_OF_ARGUMENT} diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference new file mode 100644 index 00000000000..1fc6683620c --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -0,0 +1,9 @@ +1 +2 1 +3 0 +0 450 +1 460 +2 470 +3 480 +4 490 +\N 4950 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql new file mode 100644 index 00000000000..f10019a78dd --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -0,0 +1,49 @@ +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t_func_to_subcolumns_map_2; + +CREATE TABLE t_func_to_subcolumns_map_2 (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map_2 VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SELECT sum(mapContains(m, toNullable('aaa'))) FROM t_func_to_subcolumns_map_2; + +DROP TABLE t_func_to_subcolumns_map_2; + +DROP TABLE IF EXISTS t_func_to_subcolumns_join; + +CREATE TABLE t_func_to_subcolumns_join (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_join VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SET join_use_nulls = 1; + +SELECT + id, + right.n IS NULL +FROM t_func_to_subcolumns_join AS left +FULL OUTER JOIN +( + SELECT + 1 AS id, + 'qqq' AS n + UNION ALL + SELECT + 3 AS id, + 'www' +) AS right USING (id) +WHERE empty(arr); + +DROP TABLE t_func_to_subcolumns_join; + +DROP TABLE IF EXISTS t_func_to_subcolumns_use_nulls; + +CREATE TABLE t_func_to_subcolumns_use_nulls (arr Array(UInt64), v UInt64) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_use_nulls SELECT range(number % 10), number FROM numbers(100); + +SELECT length(arr) AS n, sum(v) FROM t_func_to_subcolumns_use_nulls GROUP BY n WITH ROLLUP HAVING n <= 4 OR isNull(n) ORDER BY n SETTINGS group_by_use_nulls = 1; + +DROP TABLE t_func_to_subcolumns_use_nulls; diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 8d74ebe1039..22a2fd82c64 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -1,9 +1,14 @@ #!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: this test relies on the timeouts, it always takes no less that 4 seconds to run CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# system.error_log is created lazy, flush logs query makes it sure that the table is created. +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS;" + # Get the previous number of errors for 111, 222 and 333 errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") @@ -38,4 +43,4 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql new file mode 100644 index 00000000000..5aa3e4c2e0c --- /dev/null +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, FULL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, FULL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANY, FULL, a); -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference new file mode 100644 index 00000000000..33b8cd6ee26 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -0,0 +1,3 @@ +-- bitShiftRight +-- bitShiftLeft +OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql new file mode 100644 index 00000000000..aec01753673 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -0,0 +1,17 @@ +SELECT '-- bitShiftRight'; +SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT '-- bitShiftLeft'; +SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT 'OK'; \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference new file mode 100644 index 00000000000..fcd78da1283 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference @@ -0,0 +1,2 @@ +1000000 +1000000 diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql new file mode 100644 index 00000000000..25798ef6d33 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS data_02051__fuzz_24; + +CREATE TABLE data_02051__fuzz_24 (`key` Int16, `value` String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part=0 AS SELECT number, repeat(toString(number), 5) FROM numbers(1000000.); + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +detach table data_02051__fuzz_24; +attach table data_02051__fuzz_24; + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +DROP TABLE data_02051__fuzz_24; diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference new file mode 100644 index 00000000000..6d2c1334d6e --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -0,0 +1,10 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql new file mode 100644 index 00000000000..a01a595dbb5 --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -0,0 +1,7 @@ +set allow_experimental_dynamic_type=1; +create table test (d Dynamic) engine=Memory; +insert into table test select * from numbers(5); +alter table test modify column d Dynamic(max_types=1); +select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=0; + diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference index 71ddd681581..85ecd0b9a71 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.reference +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -1,5 +1,5 @@ test hits 1 57344 7 -test hits 1 8839168 1079 -test hits 1 835584 102 +test hits 1 8832938 1079 +test hits 1 829354 102 test hits 1 8003584 977 test hits 2 581632 71 diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 68734ef5ec8..fa2bfef935a 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1658,6 +1658,7 @@ fuzzBits fuzzJSON fuzzer fuzzers +fuzzQuery gRPC gccMurmurHash gcem diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 5ae4c7a0b1c..587e015b340 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1311,9 +1311,9 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa while (!children_span.empty()) { Coordination::Requests ops; - for (size_t i = 0; i < 1000 && !children.empty(); ++i) + for (size_t i = 0; i < 1000 && !children_span.empty(); ++i) { - removeRecursive(zookeeper, fs::path(path) / children.back()); + removeRecursive(zookeeper, fs::path(path) / children_span.back()); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); children_span = children_span.subspan(0, children_span.size() - 1); } diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index e800e923482..c3a62a01eac 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -18,45 +18,46 @@ connections: host: "localhost:9181" -generator: - setup: +setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" node: - name: "test3" + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 node: - name: "test_create" - node: - name: "test4" - node: - name: "test" - data: "somedata" - node: - repeat: 4 - name: - random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 + repeat: 2 node: repeat: 2 - node: - repeat: 2 - name: - random_string: - size: 12 name: random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 - node: - name: "test2" - data: "somedata" + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + +generator: requests: create: path: "/test_create"