diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 00000000000..9b75efc791d --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,17 @@ +codecov: + max_report_age: off + strict_yaml_branch: "master" + +ignore: + - "contrib" + - "docs" + - "benchmark" + - "tests" + - "docker" + - "debian" + - "cmake" + +comment: false + +github_checks: + annotations: false \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index eb21c4bfd00..865a876b276 100644 --- a/.gitmodules +++ b/.gitmodules @@ -186,3 +186,4 @@ [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl url = https://github.com/cyrusimap/cyrus-sasl + branch = cyrus-sasl-2.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index be1b6ac04f5..f4e230fbd93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,7 +173,7 @@ endif () # Make sure the final executable has symbols exported set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") -find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") +find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") if (OBJCOPY_PATH) message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") @@ -313,7 +313,7 @@ if (COMPILER_CLANG) endif () # Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled - find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") + find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") if (LLVM_AR_PATH) message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.") @@ -322,7 +322,7 @@ if (COMPILER_CLANG) message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.") endif () - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8") + find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8") if (LLVM_RANLIB_PATH) message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.") diff --git a/README.md b/README.md index 6b909dd710c..5753118eb4d 100644 --- a/README.md +++ b/README.md @@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse for Edge Analytics](https://ones2020.sched.com/event/bWPs) on September 29, 2020. * [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020. diff --git a/SECURITY.md b/SECURITY.md index 67d8dc85c13..6d64dd2d24b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -10,11 +10,14 @@ currently being supported with security updates: | 1.x | :x: | | 18.x | :x: | | 19.x | :x: | -| 19.14 | :white_check_mark: | | 20.1 | :x: | | 20.3 | :white_check_mark: | -| 20.4 | :white_check_mark: | -| 20.5 | :white_check_mark: | +| 20.4 | :x: | +| 20.5 | :x: | +| 20.6 | :x: | +| 20.7 | :white_check_mark: | +| 20.8 | :white_check_mark: | +| 20.9 | :white_check_mark: | ## Reporting a Vulnerability diff --git a/base/common/LineReader.cpp b/base/common/LineReader.cpp index dd2e09b0393..b2bc929a1df 100644 --- a/base/common/LineReader.cpp +++ b/base/common/LineReader.cpp @@ -38,10 +38,10 @@ bool hasInputData() } -LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const +std::optional LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const { if (!ready) - return std::make_pair(words.end(), words.end()); + return std::nullopt; std::string_view last_word; diff --git a/base/common/LineReader.h b/base/common/LineReader.h index 77dc70d8808..159a93ce40d 100644 --- a/base/common/LineReader.h +++ b/base/common/LineReader.h @@ -4,6 +4,7 @@ #include #include +#include class LineReader { @@ -18,7 +19,7 @@ public: std::atomic ready{false}; /// Get iterators for the matched range of words if any. - WordsRange getCompletions(const String & prefix, size_t prefix_length) const; + std::optional getCompletions(const String & prefix, size_t prefix_length) const; }; using Patterns = std::vector; diff --git a/base/common/ReadlineLineReader.cpp b/base/common/ReadlineLineReader.cpp index 095df319acc..4e67ed5e4a3 100644 --- a/base/common/ReadlineLineReader.cpp +++ b/base/common/ReadlineLineReader.cpp @@ -30,7 +30,8 @@ static LineReader::Suggest::Words::const_iterator end; static void findRange(const char * prefix, size_t prefix_length) { std::string prefix_str(prefix); - std::tie(pos, end) = suggest->getCompletions(prefix_str, prefix_length); + if (auto completions = suggest->getCompletions(prefix_str, prefix_length)) + std::tie(pos, end) = *completions; } /// Iterates through matched range. diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index cc8b963958c..c4f9c65a116 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -70,8 +70,9 @@ ReplxxLineReader::ReplxxLineReader( auto callback = [&suggest] (const String & context, size_t context_size) { - auto range = suggest.getCompletions(context, context_size); - return Replxx::completions_t(range.first, range.second); + if (auto range = suggest.getCompletions(context, context_size)) + return Replxx::completions_t(range->first, range->second); + return Replxx::completions_t(); }; rx.set_completion_callback(callback); diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h index 5673ac46c4a..ea4da63da20 100644 --- a/base/common/wide_integer_impl.h +++ b/base/common/wide_integer_impl.h @@ -358,7 +358,12 @@ private: } else { - if (number * sizeof(base_type) < sizeof(T)) + if constexpr (sizeof(T) <= sizeof(base_type)) + { + if (!number) + return x; + } + else if (number * sizeof(base_type) < sizeof(T)) return x >> (number * base_bits); // & std::numeric_limits::max() return 0; } @@ -366,26 +371,32 @@ private: template constexpr static integer - op_minus(const integer & lhs, T rhs) + minus(const integer & lhs, T rhs) { - integer res; + constexpr const unsigned rhs_items = (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1; + constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items; - bool is_underflow = false; - for (unsigned i = 0; i < item_count; ++i) + integer res(lhs); + bool underflows[item_count] = {}; + + for (unsigned i = 0; i < op_items; ++i) { - base_type lhs_item = lhs.items[little(i)]; base_type rhs_item = get_item(rhs, i); + base_type & res_item = res.items[little(i)]; - if (is_underflow) + underflows[i] = res_item < rhs_item; + res_item -= rhs_item; + } + + for (unsigned i = 1; i < item_count; ++i) + { + if (underflows[i-1]) { - is_underflow = (lhs_item == 0); - --lhs_item; + base_type & res_item = res.items[little(i)]; + if (res_item == 0) + underflows[i] = true; + --res_item; } - - if (lhs_item < rhs_item) - is_underflow = true; - - res.items[little(i)] = lhs_item - rhs_item; } return res; @@ -393,73 +404,126 @@ private: template constexpr static integer - op_plus(const integer & lhs, T rhs) + plus(const integer & lhs, T rhs) { - integer res; + constexpr const unsigned rhs_items = (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1; + constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items; - bool is_overflow = false; - for (unsigned i = 0; i < item_count; ++i) + integer res(lhs); + bool overflows[item_count] = {}; + + for (unsigned i = 0; i < op_items; ++i) { - base_type lhs_item = lhs.items[little(i)]; base_type rhs_item = get_item(rhs, i); - - if (is_overflow) - { - ++lhs_item; - is_overflow = (lhs_item == 0); - } - base_type & res_item = res.items[little(i)]; - res_item = lhs_item + rhs_item; - if (res_item < rhs_item) - is_overflow = true; + res_item += rhs_item; + overflows[i] = res_item < rhs_item; + } + + for (unsigned i = 1; i < item_count; ++i) + { + if (overflows[i-1]) + { + base_type & res_item = res.items[little(i)]; + ++res_item; + if (res_item == 0) + overflows[i] = true; + } } return res; } template - constexpr static auto op_multiply(const integer & lhs, const T & rhs) + constexpr static integer + multiply(const integer & lhs, const T & rhs) { - integer res{}; -#if 1 - integer lhs2 = op_plus(lhs, shift_left(lhs, 1)); - integer lhs3 = op_plus(lhs2, shift_left(lhs, 2)); -#endif - for (unsigned i = 0; i < item_count; ++i) + if constexpr (Bits == 256 && sizeof(base_type) == 8) { - base_type rhs_item = get_item(rhs, i); - unsigned pos = i * base_bits; + /// @sa https://github.com/abseil/abseil-cpp/blob/master/absl/numeric/int128.h + using HalfType = unsigned __int128; - while (rhs_item) + HalfType a01 = (HalfType(lhs.items[little(1)]) << 64) + lhs.items[little(0)]; + HalfType a23 = (HalfType(lhs.items[little(3)]) << 64) + lhs.items[little(2)]; + HalfType a0 = lhs.items[little(0)]; + HalfType a1 = lhs.items[little(1)]; + + HalfType b01 = rhs; + uint64_t b0 = b01; + uint64_t b1 = 0; + HalfType b23 = 0; + if constexpr (sizeof(T) > 8) + b1 = b01 >> 64; + if constexpr (sizeof(T) > 16) + b23 = (HalfType(rhs.items[little(3)]) << 64) + rhs.items[little(2)]; + + HalfType r23 = a23 * b01 + a01 * b23 + a1 * b1; + HalfType r01 = a0 * b0; + HalfType r12 = (r01 >> 64) + (r23 << 64); + HalfType r12_x = a1 * b0; + + integer res; + res.items[little(0)] = r01; + res.items[little(3)] = r23 >> 64; + + if constexpr (sizeof(T) > 8) { -#if 1 /// optimization - if ((rhs_item & 0x7) == 0x7) - { - res = op_plus(res, shift_left(lhs3, pos)); - rhs_item >>= 3; - pos += 3; - continue; - } - - if ((rhs_item & 0x3) == 0x3) - { - res = op_plus(res, shift_left(lhs2, pos)); - rhs_item >>= 2; - pos += 2; - continue; - } -#endif - if (rhs_item & 1) - res = op_plus(res, shift_left(lhs, pos)); - - rhs_item >>= 1; - ++pos; + HalfType r12_y = a0 * b1; + r12_x += r12_y; + if (r12_x < r12_y) + ++res.items[little(3)]; } - } - return res; + r12 += r12_x; + if (r12 < r12_x) + ++res.items[little(3)]; + + res.items[little(1)] = r12; + res.items[little(2)] = r12 >> 64; + return res; + } + else + { + integer res{}; +#if 1 + integer lhs2 = plus(lhs, shift_left(lhs, 1)); + integer lhs3 = plus(lhs2, shift_left(lhs, 2)); +#endif + for (unsigned i = 0; i < item_count; ++i) + { + base_type rhs_item = get_item(rhs, i); + unsigned pos = i * base_bits; + + while (rhs_item) + { +#if 1 /// optimization + if ((rhs_item & 0x7) == 0x7) + { + res = plus(res, shift_left(lhs3, pos)); + rhs_item >>= 3; + pos += 3; + continue; + } + + if ((rhs_item & 0x3) == 0x3) + { + res = plus(res, shift_left(lhs2, pos)); + rhs_item >>= 2; + pos += 2; + continue; + } +#endif + if (rhs_item & 1) + res = plus(res, shift_left(lhs, pos)); + + rhs_item >>= 1; + ++pos; + } + } + + return res; + } } public: @@ -475,7 +539,7 @@ public: constexpr static integer operator_unary_minus(const integer & lhs) noexcept(std::is_same_v) { - return op_plus(operator_unary_tilda(lhs), 1); + return plus(operator_unary_tilda(lhs), 1); } template @@ -484,9 +548,9 @@ public: if constexpr (should_keep_size()) { if (is_negative(rhs)) - return op_minus(lhs, -rhs); + return minus(lhs, -rhs); else - return op_plus(lhs, rhs); + return plus(lhs, rhs); } else { @@ -502,9 +566,9 @@ public: if constexpr (should_keep_size()) { if (is_negative(rhs)) - return op_plus(lhs, -rhs); + return plus(lhs, -rhs); else - return op_minus(lhs, rhs); + return minus(lhs, rhs); } else { @@ -523,12 +587,12 @@ public: if constexpr (std::is_signed_v) { - res = op_multiply((is_negative(lhs) ? make_positive(lhs) : lhs), + res = multiply((is_negative(lhs) ? make_positive(lhs) : lhs), (is_negative(rhs) ? make_positive(rhs) : rhs)); } else { - res = op_multiply(lhs, (is_negative(rhs) ? make_positive(rhs) : rhs)); + res = multiply(lhs, (is_negative(rhs) ? make_positive(rhs) : rhs)); } if (std::is_same_v && is_negative(lhs) != is_negative(rhs)) @@ -775,20 +839,20 @@ public: { if (*c >= '0' && *c <= '9') { - res = op_multiply(res, 16U); - res = op_plus(res, *c - '0'); + res = multiply(res, 16U); + res = plus(res, *c - '0'); ++c; } else if (*c >= 'a' && *c <= 'f') { - res = op_multiply(res, 16U); - res = op_plus(res, *c - 'a' + 10U); + res = multiply(res, 16U); + res = plus(res, *c - 'a' + 10U); ++c; } else if (*c >= 'A' && *c <= 'F') { // tolower must be used, but it is not constexpr - res = op_multiply(res, 16U); - res = op_plus(res, *c - 'A' + 10U); + res = multiply(res, 16U); + res = plus(res, *c - 'A' + 10U); ++c; } else @@ -802,8 +866,8 @@ public: if (*c < '0' || *c > '9') throwError("invalid char from"); - res = op_multiply(res, 10U); - res = op_plus(res, *c - '0'); + res = multiply(res, 10U); + res = plus(res, *c - '0'); ++c; } } diff --git a/base/loggers/ExtendedLogChannel.cpp b/base/loggers/ExtendedLogChannel.cpp index 421113b425f..4ab6ad5c96a 100644 --- a/base/loggers/ExtendedLogChannel.cpp +++ b/base/loggers/ExtendedLogChannel.cpp @@ -23,6 +23,7 @@ ExtendedLogMessage ExtendedLogMessage::getFrom(const Poco::Message & base) msg_ext.time_seconds = static_cast(tv.tv_sec); msg_ext.time_microseconds = static_cast(tv.tv_usec); + msg_ext.time_in_microseconds = static_cast((tv.tv_sec) * 1000000U + (tv.tv_usec)); if (current_thread) { diff --git a/base/loggers/ExtendedLogChannel.h b/base/loggers/ExtendedLogChannel.h index 3e9b61e8ae4..771a27159f0 100644 --- a/base/loggers/ExtendedLogChannel.h +++ b/base/loggers/ExtendedLogChannel.h @@ -23,6 +23,7 @@ public: uint32_t time_seconds = 0; uint32_t time_microseconds = 0; + uint64_t time_in_microseconds = 0; uint64_t thread_id = 0; std::string query_id; diff --git a/base/loggers/OwnSplitChannel.cpp b/base/loggers/OwnSplitChannel.cpp index 3ae928864fb..fc167ec3bf6 100644 --- a/base/loggers/OwnSplitChannel.cpp +++ b/base/loggers/OwnSplitChannel.cpp @@ -76,6 +76,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) TextLogElement elem; elem.event_time = msg_ext.time_seconds; + elem.event_time_microseconds = msg_ext.time_in_microseconds; elem.microseconds = msg_ext.time_microseconds; elem.thread_name = getThreadName(); diff --git a/cmake/analysis.cmake b/cmake/analysis.cmake index 0818d608f32..369be295746 100644 --- a/cmake/analysis.cmake +++ b/cmake/analysis.cmake @@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY) message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.") endif() - find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8") + find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8") if (CLANG_TIDY_PATH) message(STATUS diff --git a/cmake/find/protobuf.cmake b/cmake/find/protobuf.cmake index a7769206e9f..eb9fbe3edef 100644 --- a/cmake/find/protobuf.cmake +++ b/cmake/find/protobuf.cmake @@ -1,57 +1,62 @@ option(ENABLE_PROTOBUF "Enable protobuf" ${ENABLE_LIBRARIES}) if(NOT ENABLE_PROTOBUF) - if(USE_INTERNAL_PROTOBUF_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf with ENABLE_PROTOBUF=OFF") - endif() - return() + if(USE_INTERNAL_PROTOBUF_LIBRARY) + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf with ENABLE_PROTOBUF=OFF") + endif() + return() endif() -option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED}) +# Normally we use the internal protobuf library. +# You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. +# The external protobuf library can be installed in the system by running +# sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev +option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk)" ${NOT_UNBUNDLED}) if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") - if(USE_INTERNAL_PROTOBUF_LIBRARY) - message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf") - set(USE_INTERNAL_PROTOBUF_LIBRARY 0) - endif() - set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) + if(USE_INTERNAL_PROTOBUF_LIBRARY) + message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf") + set(USE_INTERNAL_PROTOBUF_LIBRARY 0) + endif() + set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) endif() if(NOT USE_INTERNAL_PROTOBUF_LIBRARY) - find_package(Protobuf) - if (Protobuf_LIBRARY AND Protobuf_INCLUDE_DIR AND Protobuf_PROTOC_EXECUTABLE) - set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 1) - set(USE_PROTOBUF 1) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf") - set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0) - endif() + find_package(Protobuf) + if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY) + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf library") + set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0) + elseif(NOT Protobuf_PROTOC_EXECUTABLE) + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf compiler") + set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0) + else() + set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 1) + set(USE_PROTOBUF 1) + endif() endif() -if (NOT EXTERNAL_PROTOBUF_LIBRARY_FOUND AND NOT MISSING_INTERNAL_PROTOBUF_LIBRARY) - set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") +if(NOT EXTERNAL_PROTOBUF_LIBRARY_FOUND AND NOT MISSING_INTERNAL_PROTOBUF_LIBRARY) + set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") + set(Protobuf_LIBRARY libprotobuf) + set(Protobuf_PROTOC_EXECUTABLE "$") + set(Protobuf_PROTOC_LIBRARY libprotoc) - set(USE_PROTOBUF 1) - set(USE_INTERNAL_PROTOBUF_LIBRARY 1) - set(Protobuf_LIBRARY libprotobuf) - set(Protobuf_PROTOC_LIBRARY libprotoc) - set(Protobuf_LITE_LIBRARY libprotobuf-lite) + include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake") - set(Protobuf_PROTOC_EXECUTABLE "$") + set(USE_INTERNAL_PROTOBUF_LIBRARY 1) + set(USE_PROTOBUF 1) endif() if(OS_FREEBSD AND SANITIZE STREQUAL "address") - # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found - # #include - if(LLVM_INCLUDE_DIRS) - set(Protobuf_INCLUDE_DIR "${Protobuf_INCLUDE_DIR}" ${LLVM_INCLUDE_DIRS}) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use protobuf on FreeBSD with address sanitizer without LLVM") - set(USE_PROTOBUF 0) - endif() + # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found + # #include + if(LLVM_INCLUDE_DIRS) + set(Protobuf_INCLUDE_DIR "${Protobuf_INCLUDE_DIR}" ${LLVM_INCLUDE_DIRS}) + else() + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use protobuf on FreeBSD with address sanitizer without LLVM") + set(USE_PROTOBUF 0) + endif() endif() -include ("${ClickHouse_SOURCE_DIR}/cmake/protobuf_generate_cpp.cmake") - -message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY} : ${Protobuf_PROTOC_EXECUTABLE}") +message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY} : ${Protobuf_PROTOC_EXECUTABLE} : ${Protobuf_PROTOC_LIBRARY}") diff --git a/cmake/find/rdkafka.cmake b/cmake/find/rdkafka.cmake index d9f815dbcdd..ac11322f408 100644 --- a/cmake/find/rdkafka.cmake +++ b/cmake/find/rdkafka.cmake @@ -14,10 +14,10 @@ if (NOT ENABLE_RDKAFKA) return() endif() -if (NOT ARCH_ARM AND USE_LIBGSASL) +if (NOT ARCH_ARM) option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED}) elseif(USE_INTERNAL_RDKAFKA_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM} AND USE_LIBGSASL=${USE_LIBGSASL}") + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM}") endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt") diff --git a/cmake/protobuf_generate_cpp.cmake b/cmake/protobuf_generate_cpp.cmake deleted file mode 100644 index cc2502e5eeb..00000000000 --- a/cmake/protobuf_generate_cpp.cmake +++ /dev/null @@ -1,172 +0,0 @@ -# This file declares functions adding custom commands for generating C++ files from *.proto files: -# function (protobuf_generate_cpp SRCS HDRS) -# function (protobuf_generate_grpc_cpp SRCS HDRS) - -if (NOT USE_PROTOBUF) - message (WARNING "Could not use protobuf_generate_cpp() without the protobuf library") - return() -endif() - -if (NOT DEFINED PROTOBUF_PROTOC_EXECUTABLE) - set (PROTOBUF_PROTOC_EXECUTABLE "$") -endif() - -if (NOT DEFINED GRPC_CPP_PLUGIN_EXECUTABLE) - set (GRPC_CPP_PLUGIN_EXECUTABLE $) -endif() - -if (NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH) - set (PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) -endif() - - -function(protobuf_generate_cpp_impl SRCS HDRS MODES OUTPUT_FILE_EXTS PLUGIN) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate_cpp() called without any proto files") - return() - endif() - - if(PROTOBUF_GENERATE_CPP_APPEND_PATH) - # Create an include path for each file specified - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(ABS_PATH ${ABS_FIL} PATH) - list(FIND protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - else() - set(protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - - if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) - set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") - endif() - - if(DEFINED Protobuf_IMPORT_DIRS) - foreach(DIR ${Protobuf_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - endif() - - set (intermediate_dir ${CMAKE_CURRENT_BINARY_DIR}/intermediate) - file (MAKE_DIRECTORY ${intermediate_dir}) - - set (protoc_args) - foreach (mode ${MODES}) - list (APPEND protoc_args "--${mode}_out" ${intermediate_dir}) - endforeach() - if (PLUGIN) - list (APPEND protoc_args "--plugin=${PLUGIN}") - endif() - - set(srcs) - set(hdrs) - set(all_intermediate_outputs) - - foreach(input_name ${ARGN}) - get_filename_component(abs_name ${input_name} ABSOLUTE) - get_filename_component(name ${input_name} NAME_WE) - - set (intermediate_outputs) - foreach (ext ${OUTPUT_FILE_EXTS}) - set (filename "${name}${ext}") - set (output "${CMAKE_CURRENT_BINARY_DIR}/${filename}") - set (intermediate_output "${intermediate_dir}/${filename}") - list (APPEND intermediate_outputs "${intermediate_output}") - list (APPEND all_intermediate_outputs "${intermediate_output}") - - if (${ext} MATCHES ".*\\.h") - list(APPEND hdrs "${output}") - else() - list(APPEND srcs "${output}") - endif() - - add_custom_command( - OUTPUT ${output} - COMMAND ${CMAKE_COMMAND} -DPROTOBUF_GENERATE_CPP_SCRIPT_MODE=1 -DUSE_PROTOBUF=1 -DDIR=${CMAKE_CURRENT_BINARY_DIR} -DFILENAME=${filename} -DCOMPILER_ID=${CMAKE_CXX_COMPILER_ID} -P ${ClickHouse_SOURCE_DIR}/cmake/protobuf_generate_cpp.cmake - DEPENDS ${intermediate_output}) - endforeach() - - add_custom_command( - OUTPUT ${intermediate_outputs} - COMMAND ${Protobuf_PROTOC_EXECUTABLE} - ARGS ${protobuf_include_path} ${protoc_args} ${abs_name} - DEPENDS ${abs_name} ${Protobuf_PROTOC_EXECUTABLE} ${PLUGIN} - COMMENT "Running C++ protocol buffer compiler on ${name}" - VERBATIM ) - endforeach() - - set_source_files_properties(${srcs} ${hdrs} ${all_intermediate_outputs} PROPERTIES GENERATED TRUE) - set(${SRCS} ${srcs} PARENT_SCOPE) - set(${HDRS} ${hdrs} PARENT_SCOPE) -endfunction() - - -if (PROTOBUF_GENERATE_CPP_SCRIPT_MODE) - set (output "${DIR}/${FILENAME}") - set (intermediate_dir ${DIR}/intermediate) - set (intermediate_output "${intermediate_dir}/${FILENAME}") - - if (COMPILER_ID MATCHES "Clang") - set (pragma_push "#pragma clang diagnostic push\n") - set (pragma_pop "#pragma clang diagnostic pop\n") - set (pragma_disable_warnings "#pragma clang diagnostic ignored \"-Weverything\"\n") - elseif (COMPILER_ID MATCHES "GNU") - set (pragma_push "#pragma GCC diagnostic push\n") - set (pragma_pop "#pragma GCC diagnostic pop\n") - set (pragma_disable_warnings "#pragma GCC diagnostic ignored \"-Wall\"\n" - "#pragma GCC diagnostic ignored \"-Wextra\"\n" - "#pragma GCC diagnostic ignored \"-Warray-bounds\"\n" - "#pragma GCC diagnostic ignored \"-Wold-style-cast\"\n" - "#pragma GCC diagnostic ignored \"-Wshadow\"\n" - "#pragma GCC diagnostic ignored \"-Wsuggest-override\"\n" - "#pragma GCC diagnostic ignored \"-Wcast-qual\"\n" - "#pragma GCC diagnostic ignored \"-Wunused-parameter\"\n") - endif() - - if (${FILENAME} MATCHES ".*\\.h") - file(WRITE "${output}" - "#pragma once\n" - ${pragma_push} - ${pragma_disable_warnings} - "#include \"${intermediate_output}\"\n" - ${pragma_pop} - ) - else() - file(WRITE "${output}" - ${pragma_disable_warnings} - "#include \"${intermediate_output}\"\n" - ) - endif() - return() -endif() - - -function(protobuf_generate_cpp SRCS HDRS) - set (modes cpp) - set (output_file_exts ".pb.cc" ".pb.h") - set (plugin) - - protobuf_generate_cpp_impl(srcs hdrs "${modes}" "${output_file_exts}" "${plugin}" ${ARGN}) - - set(${SRCS} ${srcs} PARENT_SCOPE) - set(${HDRS} ${hdrs} PARENT_SCOPE) -endfunction() - - -function(protobuf_generate_grpc_cpp SRCS HDRS) - set (modes cpp grpc) - set (output_file_exts ".pb.cc" ".pb.h" ".grpc.pb.cc" ".grpc.pb.h") - set (plugin "protoc-gen-grpc=${GRPC_CPP_PLUGIN_EXECUTABLE}") - - protobuf_generate_cpp_impl(srcs hdrs "${modes}" "${output_file_exts}" "${plugin}" ${ARGN}) - - set(${SRCS} ${srcs} PARENT_SCOPE) - set(${HDRS} ${hdrs} PARENT_SCOPE) -endfunction() diff --git a/contrib/cyrus-sasl b/contrib/cyrus-sasl index 6054630889f..9995bf9d8e1 160000 --- a/contrib/cyrus-sasl +++ b/contrib/cyrus-sasl @@ -1 +1 @@ -Subproject commit 6054630889fd1cd8d0659573d69badcee1e23a00 +Subproject commit 9995bf9d8e14f58934d9313ac64f13780d6dd3c9 diff --git a/contrib/poco b/contrib/poco index 297fc905e16..757d947235b 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 297fc905e166392156f83b96aaa5f44e8a6a35c4 +Subproject commit 757d947235b307675cff964f29b19d388140a9eb diff --git a/contrib/protobuf-cmake/protobuf_generate.cmake b/contrib/protobuf-cmake/protobuf_generate.cmake new file mode 100644 index 00000000000..fc1dfd9cc11 --- /dev/null +++ b/contrib/protobuf-cmake/protobuf_generate.cmake @@ -0,0 +1,198 @@ +# The code in this file was copied from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + +#[[ +Add custom commands to process ``.proto`` files to C++:: + +protobuf_generate_cpp ( + [DESCRIPTORS ] [EXPORT_MACRO ] [...]) + +``SRCS`` + Variable to define with autogenerated source files +``HDRS`` + Variable to define with autogenerated header files +``DESCRIPTORS`` + Variable to define with autogenerated descriptor files, if requested. +``EXPORT_MACRO`` + is a macro which should expand to ``__declspec(dllexport)`` or + ``__declspec(dllimport)`` depending on what is being compiled. +``ARGN`` + ``.proto`` files +#]] + +function(PROTOBUF_GENERATE_CPP SRCS HDRS) + cmake_parse_arguments(protobuf_generate_cpp "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN}) + + set(_proto_files "${protobuf_generate_cpp_UNPARSED_ARGUMENTS}") + if(NOT _proto_files) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files") + return() + endif() + + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + set(_append_arg APPEND_PATH) + endif() + + if(protobuf_generate_cpp_DESCRIPTORS) + set(_descriptors DESCRIPTORS) + endif() + + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + set(_import_arg IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) + endif() + + set(_outvar) + protobuf_generate(${_append_arg} ${_descriptors} LANGUAGE cpp EXPORT_MACRO ${protobuf_generate_cpp_EXPORT_MACRO} OUT_VAR _outvar ${_import_arg} PROTOS ${_proto_files}) + + set(${SRCS}) + set(${HDRS}) + if(protobuf_generate_cpp_DESCRIPTORS) + set(${protobuf_generate_cpp_DESCRIPTORS}) + endif() + + foreach(_file ${_outvar}) + if(_file MATCHES "cc$") + list(APPEND ${SRCS} ${_file}) + elseif(_file MATCHES "desc$") + list(APPEND ${protobuf_generate_cpp_DESCRIPTORS} ${_file}) + else() + list(APPEND ${HDRS} ${_file}) + endif() + endforeach() + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + set(${HDRS} ${${HDRS}} PARENT_SCOPE) + if(protobuf_generate_cpp_DESCRIPTORS) + set(${protobuf_generate_cpp_DESCRIPTORS} "${${protobuf_generate_cpp_DESCRIPTORS}}" PARENT_SCOPE) + endif() +endfunction() + +# By default have PROTOBUF_GENERATE_CPP macro pass -I to protoc +# for each directory where a proto file is referenced. +if(NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH) + set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) +endif() + +function(protobuf_generate) + set(_options APPEND_PATH DESCRIPTORS) + set(_singleargs LANGUAGE OUT_VAR EXPORT_MACRO PROTOC_OUT_DIR) + if(COMMAND target_sources) + list(APPEND _singleargs TARGET) + endif() + set(_multiargs PROTOS IMPORT_DIRS GENERATE_EXTENSIONS) + + cmake_parse_arguments(protobuf_generate "${_options}" "${_singleargs}" "${_multiargs}" "${ARGN}") + + if(NOT protobuf_generate_PROTOS AND NOT protobuf_generate_TARGET) + message(SEND_ERROR "Error: protobuf_generate called without any targets or source files") + return() + endif() + + if(NOT protobuf_generate_OUT_VAR AND NOT protobuf_generate_TARGET) + message(SEND_ERROR "Error: protobuf_generate called without a target or output variable") + return() + endif() + + if(NOT protobuf_generate_LANGUAGE) + set(protobuf_generate_LANGUAGE cpp) + endif() + string(TOLOWER ${protobuf_generate_LANGUAGE} protobuf_generate_LANGUAGE) + + if(NOT protobuf_generate_PROTOC_OUT_DIR) + set(protobuf_generate_PROTOC_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + if(protobuf_generate_EXPORT_MACRO AND protobuf_generate_LANGUAGE STREQUAL cpp) + set(_dll_export_decl "dllexport_decl=${protobuf_generate_EXPORT_MACRO}:") + endif() + + if(NOT protobuf_generate_GENERATE_EXTENSIONS) + if(protobuf_generate_LANGUAGE STREQUAL cpp) + set(protobuf_generate_GENERATE_EXTENSIONS .pb.h .pb.cc) + elseif(protobuf_generate_LANGUAGE STREQUAL python) + set(protobuf_generate_GENERATE_EXTENSIONS _pb2.py) + else() + message(SEND_ERROR "Error: protobuf_generate given unknown Language ${LANGUAGE}, please provide a value for GENERATE_EXTENSIONS") + return() + endif() + endif() + + if(protobuf_generate_TARGET) + get_target_property(_source_list ${protobuf_generate_TARGET} SOURCES) + foreach(_file ${_source_list}) + if(_file MATCHES "proto$") + list(APPEND protobuf_generate_PROTOS ${_file}) + endif() + endforeach() + endif() + + if(NOT protobuf_generate_PROTOS) + message(SEND_ERROR "Error: protobuf_generate could not find any .proto files") + return() + endif() + + if(protobuf_generate_APPEND_PATH) + # Create an include path for each file specified + foreach(_file ${protobuf_generate_PROTOS}) + get_filename_component(_abs_file ${_file} ABSOLUTE) + get_filename_component(_abs_path ${_abs_file} PATH) + list(FIND _protobuf_include_path ${_abs_path} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${_abs_path}) + endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + + foreach(DIR ${protobuf_generate_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + + set(_generated_srcs_all) + foreach(_proto ${protobuf_generate_PROTOS}) + get_filename_component(_abs_file ${_proto} ABSOLUTE) + get_filename_component(_abs_dir ${_abs_file} DIRECTORY) + get_filename_component(_basename ${_proto} NAME_WE) + file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) + + set(_possible_rel_dir) + if (NOT protobuf_generate_APPEND_PATH) + set(_possible_rel_dir ${_rel_dir}/) + endif() + + set(_generated_srcs) + foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) + list(APPEND _generated_srcs "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}${_ext}") + endforeach() + + if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) + set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") + set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") + list(APPEND _generated_srcs ${_descriptor_file}) + endif() + list(APPEND _generated_srcs_all ${_generated_srcs}) + + add_custom_command( + OUTPUT ${_generated_srcs} + COMMAND protobuf::protoc + ARGS --${protobuf_generate_LANGUAGE}_out ${_dll_export_decl}${protobuf_generate_PROTOC_OUT_DIR} ${_dll_desc_out} ${_protobuf_include_path} ${_abs_file} + DEPENDS ${_abs_file} protobuf::protoc + COMMENT "Running ${protobuf_generate_LANGUAGE} protocol buffer compiler on ${_proto}" + VERBATIM ) + endforeach() + + set_source_files_properties(${_generated_srcs_all} PROPERTIES GENERATED TRUE) + if(protobuf_generate_OUT_VAR) + set(${protobuf_generate_OUT_VAR} ${_generated_srcs_all} PARENT_SCOPE) + endif() + if(protobuf_generate_TARGET) + target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) + endif() +endfunction() diff --git a/docker/builder/build.sh b/docker/builder/build.sh index 983c25fdec4..d814bcdf2b4 100755 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -1,9 +1,10 @@ #!/usr/bin/env bash +set -e #ccache -s # uncomment to display CCache statistics mkdir -p /server/build_docker cd /server/build_docker -cmake -G Ninja /server -DCMAKE_C_COMPILER=`which gcc-9` -DCMAKE_CXX_COMPILER=`which g++-9` +cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v gcc-9)" "-DCMAKE_CXX_COMPILER=$(command -v g++-9)" # Set the number of build jobs to the half of number of virtual CPU cores (rounded up). # By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time. diff --git a/docker/images.json b/docker/images.json index 8c2cb35b004..e9e91864e1e 100644 --- a/docker/images.json +++ b/docker/images.json @@ -133,6 +133,10 @@ "name": "yandex/clickhouse-postgresql-java-client", "dependent": [] }, + "docker/test/integration/kerberos_kdc": { + "name": "yandex/clickhouse-kerberos-kdc", + "dependent": [] + }, "docker/test/base": { "name": "yandex/clickhouse-test-base", "dependent": [ diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index fd70b03242b..5df0392cb4d 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -17,7 +17,8 @@ ccache --show-stats ||: ccache --zero-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: rm -f CMakeCache.txt -cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER -DENABLE_CHECK_HEAVY_BUILDS=1 $CMAKE_FLAGS .. +cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "$CMAKE_FLAGS" .. +# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. ninja $NINJA_FLAGS clickhouse-bundle mv ./programs/clickhouse* /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh index fbaa0151c6b..a8072c5e78a 100755 --- a/docker/packager/deb/build.sh +++ b/docker/packager/deb/build.sh @@ -4,16 +4,16 @@ set -x -e ccache --show-stats ||: ccache --zero-stats ||: -build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S' +build/release --no-pbuilder "$ALIEN_PKGS" | ts '%Y-%m-%d %H:%M:%S' mv /*.deb /output -mv *.changes /output -mv *.buildinfo /output +mv -- *.changes /output +mv -- *.buildinfo /output mv /*.rpm /output ||: # if exists mv /*.tgz /output ||: # if exists if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;} then - echo Place $BINARY_OUTPUT to output + echo "Place $BINARY_OUTPUT to output" mkdir /output/binary ||: # if exists mv /build/obj-*/programs/clickhouse* /output/binary if [ "$BINARY_OUTPUT" = "tests" ] diff --git a/docker/packager/unbundled/build.sh b/docker/packager/unbundled/build.sh index ca1217ac522..c94bd54d001 100755 --- a/docker/packager/unbundled/build.sh +++ b/docker/packager/unbundled/build.sh @@ -4,10 +4,10 @@ set -x -e ccache --show-stats ||: ccache --zero-stats ||: -build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S' +build/release --no-pbuilder "$ALIEN_PKGS" | ts '%Y-%m-%d %H:%M:%S' mv /*.deb /output -mv *.changes /output -mv *.buildinfo /output +mv -- *.changes /output +mv -- *.buildinfo /output mv /*.rpm /output ||: # if exists mv /*.tgz /output ||: # if exists diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index aa3f1d738c2..506e32c18b3 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -48,10 +48,15 @@ RUN apt-get update \ tzdata \ --yes --no-install-recommends -# Sanitizer options +# Sanitizer options for services (clickhouse-server) RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \ echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \ echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \ ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; +# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") +# (but w/o verbosity for TSAN, otherwise test.reference will not match) +ENV TSAN_OPTIONS='halt_on_error=1 history_size=7' +ENV UBSAN_OPTIONS='print_stacktrace=1' +ENV MSAN_OPTIONS='abort_on_error=1' CMD sleep 1 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index a277ddf9d36..5e586402bd7 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -15,29 +15,59 @@ stage=${stage:-} # empty parameter. read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" +FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}") +FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}") +FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}") +FASTTEST_DATA=$(readlink -f "${FASTTEST_DATA:-$FASTTEST_WORKSPACE/db-fasttest}") +FASTTEST_OUTPUT=$(readlink -f "${FASTTEST_OUTPUT:-$FASTTEST_WORKSPACE}") -function kill_clickhouse +# Export these variables, so that all subsequent invocations of the script +# use them, and not try to guess them anew, which leads to weird effects. +export FASTTEST_WORKSPACE +export FASTTEST_SOURCE +export FASTTEST_BUILD +export FASTTEST_DATA +export FASTTEST_OUT + +server_pid=none + +function stop_server { for _ in {1..60} do - if ! pkill -f clickhouse-server ; then break ; fi + if ! pkill -f "clickhouse-server" && ! kill -- "$server_pid" ; then break ; fi sleep 1 done - if pgrep -f clickhouse-server + if kill -0 -- "$server_pid" then pstree -apgT jobs - echo "Failed to kill the ClickHouse server $(pgrep -f clickhouse-server)" + echo "Failed to kill the ClickHouse server pid '$server_pid'" return 1 fi + + server_pid=none } -function wait_for_server_start +function start_server { + set -m # Spawn server in its own process groups + clickhouse-server --config-file="$FASTTEST_DATA/config.xml" -- --path "$FASTTEST_DATA" --user_files_path "$FASTTEST_DATA/user_files" &>> "$FASTTEST_OUTPUT/server.log" & + server_pid=$! + set +m + + if [ "$server_pid" == "0" ] + then + echo "Failed to start ClickHouse server" + # Avoid zero PID because `kill` treats it as our process group PID. + server_pid="none" + return 1 + fi + for _ in {1..60} do - if clickhouse-client --query "select 1" || ! pgrep -f clickhouse-server + if clickhouse-client --query "select 1" || ! kill -0 -- "$server_pid" then break fi @@ -47,20 +77,26 @@ function wait_for_server_start if ! clickhouse-client --query "select 1" then echo "Failed to wait until ClickHouse server starts." + server_pid="none" return 1 fi - echo "ClickHouse server pid '$(pgrep -f clickhouse-server)' started and responded" + if ! kill -0 -- "$server_pid" + then + echo "Wrong clickhouse server started: PID '$server_pid' we started is not running, but '$(pgrep -f clickhouse-server)' is running" + server_pid="none" + return 1 + fi + + echo "ClickHouse server pid '$server_pid' started and responded" } function clone_root { -git clone https://github.com/ClickHouse/ClickHouse.git | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/clone_log.txt -cd ClickHouse -CLICKHOUSE_DIR=$(pwd) -export CLICKHOUSE_DIR - +git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt" +( +cd "$FASTTEST_SOURCE" if [ "$PULL_REQUEST_NUMBER" != "0" ]; then if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then git checkout FETCH_HEAD @@ -71,22 +107,36 @@ if [ "$PULL_REQUEST_NUMBER" != "0" ]; then echo 'Checked out to commit' fi else - if [ "$COMMIT_SHA" != "" ]; then + if [ -v COMMIT_SHA ]; then git checkout "$COMMIT_SHA" fi fi +) } -function run +function clone_submodules { +( +cd "$FASTTEST_SOURCE" + SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11) -git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/submodule_log.txt +git submodule sync +git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" +git submodule foreach git reset --hard +git submodule foreach git checkout @ -f +git submodule foreach git clean -xfd +) +} -CMAKE_LIBS_CONFIG=(-DENABLE_LIBRARIES=0 -DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_THINLTO=0 -DUSE_UNWIND=1) +function run_cmake +{ +CMAKE_LIBS_CONFIG=("-DENABLE_LIBRARIES=0" "-DENABLE_TESTS=0" "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1") -export CCACHE_DIR=/ccache -export CCACHE_BASEDIR=/ClickHouse +# TODO remove this? we don't use ccache anyway. An option would be to download it +# from S3 simultaneously with cloning. +export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" +export CCACHE_BASEDIR="$FASTTEST_SOURCE" export CCACHE_NOHASHDIR=true export CCACHE_COMPILERCHECK=content export CCACHE_MAXSIZE=15G @@ -94,34 +144,45 @@ export CCACHE_MAXSIZE=15G ccache --show-stats ||: ccache --zero-stats ||: -mkdir build -cd build -cmake .. -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/cmake_log.txt -time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/build_log.txt -ninja install | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/install_log.txt +mkdir "$FASTTEST_BUILD" ||: +( +cd "$FASTTEST_BUILD" +cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" +) +} +function build +{ +( +cd "$FASTTEST_BUILD" +time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" ccache --show-stats ||: +) +} -mkdir -p /etc/clickhouse-server -mkdir -p /etc/clickhouse-client -mkdir -p /etc/clickhouse-server/config.d -mkdir -p /etc/clickhouse-server/users.d -ln -s /test_output /var/log/clickhouse-server -cp "$CLICKHOUSE_DIR/programs/server/config.xml" /etc/clickhouse-server/ -cp "$CLICKHOUSE_DIR/programs/server/users.xml" /etc/clickhouse-server/ +function configure +{ +clickhouse-client --version +clickhouse-test --help -# install tests config -$CLICKHOUSE_DIR/tests/config/install.sh +mkdir -p "$FASTTEST_DATA"{,/client-config} +cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA" +cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA" +"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config" # doesn't support SSL -rm -f /etc/clickhouse-server/config.d/secure_ports.xml +rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" +} + +function run_tests +{ +clickhouse-server --version +clickhouse-test --help # Kill the server in case we are running locally and not in docker -kill_clickhouse +stop_server ||: -clickhouse-server --config /etc/clickhouse-server/config.xml --daemon - -wait_for_server_start +start_server TESTS_TO_SKIP=( parquet @@ -191,11 +252,10 @@ TESTS_TO_SKIP=( 01460_DistributedFilesToInsert ) -time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt - +time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" # substr is to remove semicolon after test name -readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' /test_output/test_log.txt | tee /test_output/failed-parallel-tests.txt) +readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt") # We will rerun sequentially any tests that have failed during parallel run. # They might have failed because there was some interference from other tests @@ -206,19 +266,16 @@ readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, le # explicit instead of guessing. if [[ -n "${FAILED_TESTS[*]}" ]] then - kill_clickhouse + stop_server ||: # Clean the data so that there is no interference from the previous test run. - rm -rf /var/lib/clickhouse ||: - mkdir /var/lib/clickhouse + rm -rf "$FASTTEST_DATA"/{meta,}data ||: - clickhouse-server --config /etc/clickhouse-server/config.xml --daemon - - wait_for_server_start + start_server echo "Going to run again: ${FAILED_TESTS[*]}" - clickhouse-test --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_output/test_log.txt + clickhouse-test --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt" else echo "No failed tests" fi @@ -228,20 +285,50 @@ case "$stage" in "") ls -la ;& - "clone_root") clone_root # Pass control to the script from cloned sources, unless asked otherwise. if ! [ -v FASTTEST_LOCAL_SCRIPT ] then - stage=run "$CLICKHOUSE_DIR/docker/test/fasttest/run.sh" + # 'run' stage is deprecated, used for compatibility with old scripts. + # Replace with 'clone_submodules' after Nov 1, 2020. + # cd and CLICKHOUSE_DIR are also a setup for old scripts, remove as well. + # In modern script we undo it by changing back into workspace dir right + # away, see below. Remove that as well. + cd "$FASTTEST_SOURCE" + CLICKHOUSE_DIR=$(pwd) + export CLICKHOUSE_DIR + stage=run "$FASTTEST_SOURCE/docker/test/fasttest/run.sh" exit $? fi ;& - "run") - run + # A deprecated stage that is called by old script and equivalent to everything + # after cloning root, starting with cloning submodules. + ;& +"clone_submodules") + # Recover after being called from the old script that changes into source directory. + # See the compatibility hacks in `clone_root` stage above. Remove at the same time, + # after Nov 1, 2020. + cd "$FASTTEST_WORKSPACE" + clone_submodules | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/submodule_log.txt" + ;& +"run_cmake") + run_cmake + ;& +"build") + build + PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH" + export PATH + ;& +"configure") + # The `install_log.txt` is also needed for compatibility with old CI task -- + # if there is no log, it will decide that build failed. + configure | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" + ;& +"run_tests") + run_tests ;& esac diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index d35a13cc421..6429c1bc19c 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -58,7 +58,7 @@ function watchdog echo "Fuzzing run has timed out" killall clickhouse-client ||: - for x in {1..10} + for _ in {1..10} do if ! pgrep -f clickhouse-client then @@ -81,6 +81,9 @@ function fuzz echo Server started fuzzer_exit_code=0 + # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. + # They are all alphanumeric. + # shellcheck disable=SC2012 ./clickhouse-client --query-fuzzer-runs=1000 \ < <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \ > >(tail -10000 > fuzzer.log) \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 35decd907c0..3e4e88965e0 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -16,7 +16,8 @@ RUN apt-get update \ odbc-postgresql \ sqlite3 \ curl \ - tar + tar \ + krb5-user RUN rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ diff --git a/docker/test/integration/kerberos_kdc/Dockerfile b/docker/test/integration/kerberos_kdc/Dockerfile new file mode 100644 index 00000000000..ea231b1191d --- /dev/null +++ b/docker/test/integration/kerberos_kdc/Dockerfile @@ -0,0 +1,15 @@ +# docker build -t yandex/clickhouse-kerberos-kdc . + +FROM centos:6.6 +# old OS to make is faster and smaller + +RUN yum install -y krb5-server krb5-libs krb5-auth-dialog krb5-workstation + +EXPOSE 88 749 + +RUN touch /config.sh +# should be overwritten e.g. via docker_compose volumes +# volumes: /some_path/my_kerberos_config.sh:/config.sh:ro + + +ENTRYPOINT ["/bin/bash", "/config.sh"] diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml new file mode 100644 index 00000000000..3ce0000b148 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -0,0 +1,59 @@ +version: '2.3' + +services: + kafka_kerberized_zookeeper: + image: confluentinc/cp-zookeeper:5.2.0 + # restart: always + hostname: kafka_kerberized_zookeeper + environment: + ZOOKEEPER_SERVER_ID: 1 + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888" + KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true" + volumes: + - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets + - /dev/urandom:/dev/random + depends_on: + - kafka_kerberos + security_opt: + - label:disable + + kerberized_kafka1: + image: confluentinc/cp-kafka:5.2.0 + # restart: always + hostname: kerberized_kafka1 + ports: + - "9092:9092" + - "9093:9093" + environment: + KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093 + KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093 + # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092 + # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092 + KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI + KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI + KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT, + KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181" + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true" + volumes: + - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets + - /dev/urandom:/dev/random + depends_on: + - kafka_kerberized_zookeeper + - kafka_kerberos + security_opt: + - label:disable + + kafka_kerberos: + image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG} + hostname: kafka_kerberos + volumes: + - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab + - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh + - /dev/urandom:/dev/random + ports: [88, 749] diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index c38260279ed..c0255d3d706 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -7,7 +7,7 @@ set +e reties=0 while true; do docker info &>/dev/null && break - reties=$[$reties+1] + reties=$((reties+1)) if [[ $reties -ge 100 ]]; then # 10 sec max echo "Can't start docker daemon, timeout exceeded." >&2 exit 1; @@ -27,6 +27,7 @@ export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest} export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest} export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest} +export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} cd /ClickHouse/tests/integration exec "$@" diff --git a/docker/test/performance-comparison/compare-releases.sh b/docker/test/performance-comparison/compare-releases.sh index a93a768f36b..456661362b3 100755 --- a/docker/test/performance-comparison/compare-releases.sh +++ b/docker/test/performance-comparison/compare-releases.sh @@ -9,7 +9,7 @@ right_version=${2} if [ "$left_version" == "" ] || [ "$right_version" == "" ] then - >&2 echo Usage: $(basename "$0") left_version right_version + >&2 echo "Usage: $(basename "$0") left_version right_version" exit 1 fi diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index ddcc303da0d..74f1e81e8f3 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -181,6 +181,9 @@ function run_tests # Randomize test order. test_files=$(for f in $test_files; do echo "$f"; done | sort -R) + # Limit profiling time to 10 minutes, not to run for too long. + profile_seconds_left=600 + # Run the tests. test_name="" for test in $test_files @@ -194,15 +197,24 @@ function run_tests test_name=$(basename "$test" ".xml") echo test "$test_name" + # Don't profile if we're past the time limit. + # Use awk because bash doesn't support floating point arithmetics. + profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }") + TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") # The grep is to filter out set -x output and keep only time output. # The '2>&1 >/dev/null' redirects stderr to stdout, and discards stdout. { \ time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 \ --runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \ + --profile-seconds "$profile_seconds" \ -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \ } 2>&1 >/dev/null | tee >(grep -v ^+ >> "wall-clock-times.tsv") \ || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" + + profile_seconds_left=$(awk -F' ' \ + 'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \ + "$test_name-raw.tsv") done unset TIMEFORMAT @@ -294,6 +306,7 @@ for test_file in *-raw.tsv do test_name=$(basename "$test_file" "-raw.tsv") sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv" + sed -n "s/^profile\t/$test_name\t/p" < "$test_file" >> "analyze/query-profiles.tsv" sed -n "s/^client-time\t/$test_name\t/p" < "$test_file" >> "analyze/client-times.tsv" sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv" sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv" @@ -658,13 +671,15 @@ create view test_runs as group by test ; -create table test_times_report engine File(TSV, 'report/test-times.tsv') as - select wall_clock_time_per_test.test, real, - toDecimal64(total_client_time, 3), +create view test_times_view as + select + wall_clock_time_per_test.test test, + real, + total_client_time, queries, - toDecimal64(query_max, 3), - toDecimal64(real / queries, 3) avg_real_per_query, - toDecimal64(query_min, 3), + query_max, + real / queries avg_real_per_query, + query_min, runs from test_time -- wall clock times are also measured for skipped tests, so don't @@ -673,7 +688,43 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as on wall_clock_time_per_test.test = test_time.test full join test_runs on test_runs.test = test_time.test - order by avg_real_per_query desc; + ; + +-- WITH TOTALS doesn't work with INSERT SELECT, so we have to jump through these +-- hoops: https://github.com/ClickHouse/ClickHouse/issues/15227 +create view test_times_view_total as + select + 'Total' test, + sum(real), + sum(total_client_time), + sum(queries), + max(query_max), + sum(real) / sum(queries) avg_real_per_query, + min(query_min), + -- Totaling the number of runs doesn't make sense, but use the max so + -- that the reporting script doesn't complain about queries being too + -- long. + max(runs) + from test_times_view + ; + +create table test_times_report engine File(TSV, 'report/test-times.tsv') as + select + test, + toDecimal64(real, 3), + toDecimal64(total_client_time, 3), + queries, + toDecimal64(query_max, 3), + toDecimal64(avg_real_per_query, 3), + toDecimal64(query_min, 3), + runs + from ( + select * from test_times_view + union all + select * from test_times_view_total + ) + order by test = 'Total' desc, avg_real_per_query desc + ; -- report for all queries page, only main metric create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as @@ -694,13 +745,12 @@ create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as test, query_index, query_display_name from queries order by test, query_index; --- queries for which we will build flamegraphs (see below) -create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, - 'report/queries-for-flamegraph.tsv') as - select test, query_index from queries where unstable_show or changed_show - ; - +-- Report of queries that have inconsistent 'short' markings: +-- 1) have short duration, but are not marked as 'short' +-- 2) the reverse -- marked 'short' but take too long. +-- The threshold for 2) is significantly larger than the threshold for 1), to +-- avoid jitter. create view shortness as select (test, query_index) in @@ -718,11 +768,6 @@ create view shortness and times.query_index = query_display_names.query_index ; --- Report of queries that have inconsistent 'short' markings: --- 1) have short duration, but are not marked as 'short' --- 2) the reverse -- marked 'short' but take too long. --- The threshold for 2) is significantly larger than the threshold for 1), to --- avoid jitter. create table inconsistent_short_marking_report engine File(TSV, 'report/unexpected-query-duration.tsv') as select @@ -759,18 +804,15 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts " 2> >(tee -a report/errors.log 1>&2) -# Prepare source data for metrics and flamegraphs for unstable queries. +# Prepare source data for metrics and flamegraphs for queries that were profiled +# by perf.py. for version in {right,left} do rm -rf data clickhouse-local --query " -create view queries_for_flamegraph as - select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes, - 'test text, query_index int'); - -create view query_runs as +create view query_profiles as with 0 as left, 1 as right - select * from file('analyze/query-runs.tsv', TSV, + select * from file('analyze/query-profiles.tsv', TSV, 'test text, query_index int, query_id text, version UInt8, time float') where version = $version ; @@ -782,15 +824,12 @@ create view query_display_names as select * from create table unstable_query_runs engine File(TSVWithNamesAndTypes, 'unstable-query-runs.$version.rep') as - select query_runs.test test, query_runs.query_index query_index, + select query_profiles.test test, query_profiles.query_index query_index, query_display_name, query_id - from query_runs - join queries_for_flamegraph on - query_runs.test = queries_for_flamegraph.test - and query_runs.query_index = queries_for_flamegraph.query_index + from query_profiles left join query_display_names on - query_runs.test = query_display_names.test - and query_runs.query_index = query_display_names.query_index + query_profiles.test = query_display_names.test + and query_profiles.query_index = query_display_names.query_index ; create view query_log as select * diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index ce953f59cd5..bd72547ec1c 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -10,7 +10,7 @@ mkdir left ||: left_pr=$1 left_sha=$2 -right_pr=$3 +# right_pr=$3 not used for now right_sha=$4 datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"} diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 23686091e45..337f13690b6 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -18,9 +18,22 @@ import xml.etree.ElementTree as et from threading import Thread from scipy import stats + +total_start_seconds = time.perf_counter() +stage_start_seconds = total_start_seconds + +def reportStageEnd(stage): + global stage_start_seconds, total_start_seconds + + current = time.perf_counter() + print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}') + stage_start_seconds = current + + def tsv_escape(s): return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') + parser = argparse.ArgumentParser(description='Run performance test.') # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') @@ -29,16 +42,21 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') +parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') args = parser.parse_args() +reportStageEnd('start') + test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] tree = et.parse(args.file[0]) root = tree.getroot() +reportStageEnd('parse') + # Process query parameters subst_elems = root.findall('substitutions/substitution') available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } @@ -78,11 +96,16 @@ for e in root.findall('query'): assert(len(test_queries) == len(is_short)) +# If we're given a list of queries to run, check that it makes sense. +for i in args.queries_to_run or []: + if i < 0 or i >= len(test_queries): + print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present') + exit(1) -# If we're only asked to print the queries, do that and exit +# If we're only asked to print the queries, do that and exit. if args.print_queries: - for q in test_queries: - print(q) + for i in args.queries_to_run or range(0, len(test_queries)): + print(test_queries[i]) exit(0) # Print short queries @@ -107,15 +130,21 @@ if not args.long: sys.exit(0) # Print report threshold for the test if it is set. +ignored_relative_change = 0.05 if 'max_ignored_relative_change' in root.attrib: - print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}') + ignored_relative_change = float(root.attrib["max_ignored_relative_change"]) + print(f'report-threshold\t{ignored_relative_change}') + +reportStageEnd('before-connect') # Open connections -servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] +servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)] all_connections = [clickhouse_driver.Client(**server) for server in servers] -for s in servers: - print('server\t{}\t{}'.format(s['host'], s['port'])) +for i, s in enumerate(servers): + print(f'server\t{i}\t{s["host"]}\t{s["port"]}') + +reportStageEnd('connect') # Run drop queries, ignoring errors. Do this before all other activity, because # clickhouse_driver disconnects on error (this is not configurable), and the new @@ -130,6 +159,8 @@ for conn_index, c in enumerate(all_connections): except: pass +reportStageEnd('drop-1') + # Apply settings. # If there are errors, report them and continue -- maybe a new test uses a setting # that is not in master, but the queries can still run. If we have multiple @@ -147,6 +178,8 @@ for conn_index, c in enumerate(all_connections): except: print(traceback.format_exc(), file=sys.stderr) +reportStageEnd('settings') + # Check tables that should exist. If they don't exist, just skip this test. tables = [e.text for e in root.findall('preconditions/table_exists')] for t in tables: @@ -159,6 +192,8 @@ for t in tables: print(f'skipped\t{tsv_escape(skipped_message)}') sys.exit(0) +reportStageEnd('preconditions') + # Run create and fill queries. We will run them simultaneously for both servers, # to save time. # The weird search is to keep the relative order of elements, which matters, and @@ -189,6 +224,9 @@ for t in threads: for t in threads: t.join() +reportStageEnd('create') + +# By default, test all queries. queries_to_run = range(0, len(test_queries)) if args.max_queries: @@ -196,15 +234,11 @@ if args.max_queries: queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries)) if args.queries_to_run: - # Run the specified queries, with some sanity check. - for i in args.queries_to_run: - if i < 0 or i >= len(test_queries): - print(f'There is no query no. "{i}" in this test, only [{0}-{len(test_queries) - 1}] are present') - exit(1) - + # Run the specified queries. queries_to_run = args.queries_to_run # Run test queries. +profile_total_seconds = 0 for query_index in queries_to_run: q = test_queries[query_index] query_prefix = f'{test_name}.query{query_index}' @@ -324,34 +358,49 @@ for query_index in queries_to_run: client_seconds = time.perf_counter() - start_seconds print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') - #print(all_server_times) - #print(stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue) - # Run additional profiling queries to collect profile data, but only if test times appeared to be different. # We have to do it after normal runs because otherwise it will affect test statistics too much - if len(all_server_times) == 2 and stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue < 0.1: - run = 0 - while True: - run_id = f'{query_prefix}.profile{run}' + if len(all_server_times) != 2: + continue - for conn_index, c in enumerate(this_query_connections): - try: - res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000}) - print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') - except Exception as e: - # Add query id to the exception to make debugging easier. - e.args = (run_id, *e.args) - e.message = run_id + ': ' + e.message - raise + if len(all_server_times[0]) < 3: + # Don't fail if for some reason there are not enough measurements. + continue - elapsed = c.last_query.elapsed - profile_seconds += elapsed + pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue + median = [statistics.median(t) for t in all_server_times] + # Keep this consistent with the value used in report. Should eventually move + # to (median[1] - median[0]) / min(median), which is compatible with "times" + # difference we use in report (max(median) / min(median)). + relative_diff = (median[1] - median[0]) / median[0] + print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}') + if abs(relative_diff) < ignored_relative_change or pvalue > 0.05: + continue - run += 1 - # Don't spend too much time for profile runs - if run > args.runs or profile_seconds > 10: - break - # And don't bother with short queries + # Perform profile runs for fixed amount of time. Don't limit the number + # of runs, because we also have short queries. + profile_start_seconds = time.perf_counter() + run = 0 + while time.perf_counter() - profile_start_seconds < args.profile_seconds: + run_id = f'{query_prefix}.profile{run}' + + for conn_index, c in enumerate(this_query_connections): + try: + res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000}) + print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') + except Exception as e: + # Add query id to the exception to make debugging easier. + e.args = (run_id, *e.args) + e.message = run_id + ': ' + e.message + raise + + run += 1 + + profile_total_seconds += time.perf_counter() - profile_start_seconds + +print(f'profile-total\t{profile_total_seconds}') + +reportStageEnd('run') # Run drop queries drop_queries = substitute_parameters(drop_query_templates) @@ -359,3 +408,5 @@ for conn_index, c in enumerate(all_connections): for q in drop_queries: c.execute(q) print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + +reportStageEnd('drop-2') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 5e4e0a161e1..69015c2ce1a 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -487,7 +487,7 @@ if args.report == 'main': for r in rows: anchor = f'{currentTableAnchor()}.{r[0]}' total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers - if float(r[5]) > allowed_average_run_time * total_runs: + if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[5] = f'style="background: {color_bad}"' @@ -495,7 +495,7 @@ if args.report == 'main': else: attrs[5] = '' - if float(r[4]) > allowed_single_run_time * total_runs: + if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs: slow_average_tests += 1 attrs[4] = f'style="background: {color_bad}"' errors_explained.append([f'Some query of the test \'{r[0]}\' is too slow to run. See the all queries report']) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 87cc4054ee6..98c9427d556 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -26,11 +26,12 @@ function start() fi timeout 120 service clickhouse-server start sleep 0.5 - counter=$(($counter + 1)) + counter=$((counter + 1)) done } start +# shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "SHOW DATABASES" @@ -43,8 +44,8 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" clickhouse-client --query "SHOW TABLES FROM test" -if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then +if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test ; then SKIP_LIST_OPT="--use-skip-list" fi -clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index 7191745ec83..b20e21efaf1 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -1,15 +1,15 @@ #!/bin/bash kill_clickhouse () { - kill `pgrep -u clickhouse` 2>/dev/null + kill "$(pgrep -u clickhouse)" 2>/dev/null - for i in {1..10} + for _ in {1..10} do - if ! kill -0 `pgrep -u clickhouse`; then + if ! kill -0 "$(pgrep -u clickhouse)"; then echo "No clickhouse process" break else - echo "Process" `pgrep -u clickhouse` "still alive" + echo "Process $(pgrep -u clickhouse) still alive" sleep 10 fi done @@ -20,19 +20,19 @@ start_clickhouse () { } wait_llvm_profdata () { - while kill -0 `pgrep llvm-profdata-10`; + while kill -0 "$(pgrep llvm-profdata-10)" do - echo "Waiting for profdata" `pgrep llvm-profdata-10` "still alive" + echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive" sleep 3 done } merge_client_files_in_background () { - client_files=`ls /client_*profraw 2>/dev/null` - if [ ! -z "$client_files" ] + client_files=$(ls /client_*profraw 2>/dev/null) + if [ -n "$client_files" ] then - llvm-profdata-10 merge -sparse $client_files -o merged_client_`date +%s`.profraw - rm $client_files + llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw" + rm "$client_files" fi } @@ -66,12 +66,13 @@ function start() fi timeout 120 service clickhouse-server start sleep 0.5 - counter=$(($counter + 1)) + counter=$((counter + 1)) done } start +# shellcheck disable=SC2086 # No quotes because I want to split it into words. if ! /s3downloader --dataset-names $DATASETS; then echo "Cannot download datatsets" exit 1 @@ -100,11 +101,11 @@ LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TA LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then +if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 0a1ca398c85..145c5fc6528 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -13,8 +13,8 @@ dpkg -i package_folder/clickhouse-test_*.deb service clickhouse-server start && sleep 5 -if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then +if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi -clickhouse-test -j 6 --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +clickhouse-test -j 4 --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateless_unbundled/run.sh b/docker/test/stateless_unbundled/run.sh index 9f2bb9bf62d..8b8612d4211 100755 --- a/docker/test/stateless_unbundled/run.sh +++ b/docker/test/stateless_unbundled/run.sh @@ -13,8 +13,8 @@ dpkg -i package_folder/clickhouse-test_*.deb service clickhouse-server start && sleep 5 -if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then +if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi -clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 2f3f05a335a..338915c9d3a 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -1,24 +1,24 @@ #!/bin/bash kill_clickhouse () { - echo "clickhouse pids" `ps aux | grep clickhouse` | ts '%Y-%m-%d %H:%M:%S' - kill `pgrep -u clickhouse` 2>/dev/null + echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + kill "$(pgrep -u clickhouse)" 2>/dev/null - for i in {1..10} + for _ in {1..10} do - if ! kill -0 `pgrep -u clickhouse`; then + if ! kill -0 "$(pgrep -u clickhouse)"; then echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S' break else - echo "Process" `pgrep -u clickhouse` "still alive" | ts '%Y-%m-%d %H:%M:%S' + echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S' sleep 10 fi done echo "Will try to send second kill signal for sure" - kill `pgrep -u clickhouse` 2>/dev/null + kill "$(pgrep -u clickhouse)" 2>/dev/null sleep 5 - echo "clickhouse pids" `ps aux | grep clickhouse` | ts '%Y-%m-%d %H:%M:%S' + echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' } start_clickhouse () { @@ -47,11 +47,11 @@ start_clickhouse sleep 10 -if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then +if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi -LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 28c66a72d39..ba529c401ea 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -13,7 +13,7 @@ function stop() timeout 120 service clickhouse-server stop # Wait for process to disappear from processlist and also try to kill zombies. - while kill -9 $(pidof clickhouse-server) + while kill -9 "$(pidof clickhouse-server)" do echo "Killed clickhouse-server" sleep 0.5 @@ -35,17 +35,21 @@ function start() fi timeout 120 service clickhouse-server start sleep 0.5 - counter=$(($counter + 1)) + counter=$((counter + 1)) done } # install test configs /usr/share/clickhouse-test/config/install.sh +# for clickhouse-server (via service) echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment +# for clickhouse-client +export ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000' start +# shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh index d9bca8f0037..cd6367b2964 100755 --- a/docker/test/test_runner.sh +++ b/docker/test/test_runner.sh @@ -2,17 +2,19 @@ set -e -x +# Not sure why shellcheck complains that rc is not assigned before it is referenced. +# shellcheck disable=SC2154 trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT # CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time readonly NO_REBUILD_FLAG="--no-rebuild" -readonly CLICKHOUSE_DOCKER_DIR="$(realpath ${1})" +readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")" readonly CLICKHOUSE_PACKAGES_ARG="${2}" CLICKHOUSE_SERVER_IMAGE="${3}" -if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then - readonly CLICKHOUSE_PACKAGES_DIR="$(realpath ${2})" # or --no-rebuild +if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then + readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild fi @@ -25,7 +27,7 @@ fi # TODO: optionally mount most recent clickhouse-test and queries directory from local machine -if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then +if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then docker build --network=host \ -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ --target clickhouse-test-runner-base \ @@ -49,7 +51,7 @@ fi if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse-server:local" - if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then + if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then docker build --network=host \ -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ --target clickhouse-server-base \ diff --git a/docker/test/testflows/runner/dockerd-entrypoint.sh b/docker/test/testflows/runner/dockerd-entrypoint.sh index b10deaded08..1bac94a9df2 100755 --- a/docker/test/testflows/runner/dockerd-entrypoint.sh +++ b/docker/test/testflows/runner/dockerd-entrypoint.sh @@ -7,7 +7,7 @@ set +e reties=0 while true; do docker info &>/dev/null && break - reties=$[$reties+1] + reties=$((reties+1)) if [[ $reties -ge 100 ]]; then # 10 sec max echo "Can't start docker daemon, timeout exceeded." >&2 exit 1; diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index fe9aa2ca25e..d0a4bc928a7 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -165,6 +165,22 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `true`. +### Kerberos support {#kafka-kerberos-support} + +To deal with Kerberos-aware Kafka, add `security_protocol` child element with `sasl_plaintext` value. It is enough if Kerberos ticket-granting ticket is obtained and cached by OS facilities. +ClickHouse is able to maintain Kerberos credentials using a keytab file. Consider `sasl_kerberos_service_name`, `sasl_kerberos_keytab`, `sasl_kerberos_principal` and `sasl.kerberos.kinit.cmd` child elements. + +Example: + +``` xml + + + SASL_PLAINTEXT + /home/kafkauser/kafkauser.keytab + kafkauser/kafkahost@EXAMPLE.COM + +``` + ## Virtual Columns {#virtual-columns} - `_topic` — Kafka topic. diff --git a/docs/en/introduction/info.md b/docs/en/introduction/info.md new file mode 100644 index 00000000000..a397c40950d --- /dev/null +++ b/docs/en/introduction/info.md @@ -0,0 +1,10 @@ +--- +toc_priority: 100 +--- + +# Information support {#information-support} + +- Email address: +- Phone: +7-495-780-6510 + +[Original article](https://clickhouse.tech/docs/en/introduction/info/) \ No newline at end of file diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 72927b5a7e9..8b663475fa8 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -33,6 +33,7 @@ Columns: - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. @@ -84,54 +85,57 @@ Columns: **Example** ``` sql -SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; +SELECT * FROM system.query_log LIMIT 1 \G ``` ``` text Row 1: ────── -type: QueryStart -event_date: 2020-05-13 -event_time: 2020-05-13 14:02:28 -query_start_time: 2020-05-13 14:02:28 -query_duration_ms: 0 -read_rows: 0 -read_bytes: 0 -written_rows: 0 -written_bytes: 0 -result_rows: 0 -result_bytes: 0 -memory_usage: 0 -query: SELECT 1 -exception_code: 0 -exception: -stack_trace: -is_initial_query: 1 -user: default -query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -address: ::ffff:127.0.0.1 -port: 57720 -initial_user: default -initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -initial_address: ::ffff:127.0.0.1 -initial_port: 57720 -interface: 1 -os_user: bayonet -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse client -client_revision: 54434 -client_version_major: 20 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 -http_user_agent: -quota_key: -revision: 54434 -thread_ids: [] -ProfileEvents.Names: [] -ProfileEvents.Values: [] -Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] -Settings.Values: ['0','random','1','10000000000'] +type: QueryStart +event_date: 2020-09-11 +event_time: 2020-09-11 10:08:17 +event_time_microseconds: 2020-09-11 10:08:17.063321 +query_start_time: 2020-09-11 10:08:17 +query_start_time_microseconds: 2020-09-11 10:08:17.063321 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +current_database: default +query: INSERT INTO test1 VALUES +exception_code: 0 +exception: +stack_trace: +is_initial_query: 1 +user: default +query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +address: ::ffff:127.0.0.1 +port: 33452 +initial_user: default +initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +initial_address: ::ffff:127.0.0.1 +initial_port: 33452 +interface: 1 +os_user: bharatnc +client_hostname: tower +client_name: ClickHouse +client_revision: 54437 +client_version_major: 20 +client_version_minor: 7 +client_version_patch: 2 +http_method: 0 +http_user_agent: +quota_key: +revision: 54440 +thread_ids: [] +ProfileEvents.Names: [] +ProfileEvents.Values: [] +Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions'] +Settings.Values: ['0','random','1','10000000000','1'] ``` **See Also** diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index 3dcd05c4cc3..84408edd117 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -15,6 +15,7 @@ Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. +- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. @@ -63,50 +64,51 @@ Columns: **Example** ``` sql - SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical + SELECT * FROM system.query_thread_log LIMIT 1 \G ``` ``` text Row 1: ────── -event_date: 2020-05-13 -event_time: 2020-05-13 14:02:28 -query_start_time: 2020-05-13 14:02:28 -query_duration_ms: 0 -read_rows: 1 -read_bytes: 1 -written_rows: 0 -written_bytes: 0 -memory_usage: 0 -peak_memory_usage: 0 -thread_name: QueryPipelineEx -thread_id: 28952 -master_thread_id: 28924 -query: SELECT 1 -is_initial_query: 1 -user: default -query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -address: ::ffff:127.0.0.1 -port: 57720 -initial_user: default -initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -initial_address: ::ffff:127.0.0.1 -initial_port: 57720 -interface: 1 -os_user: bayonet -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse client -client_revision: 54434 -client_version_major: 20 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 -http_user_agent: -quota_key: -revision: 54434 -ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds'] -ProfileEvents.Values: [1,97,81,5,81] -... +event_date: 2020-09-11 +event_time: 2020-09-11 10:08:17 +event_time_microseconds: 2020-09-11 10:08:17.134042 +query_start_time: 2020-09-11 10:08:17 +query_start_time_microseconds: 2020-09-11 10:08:17.063150 +query_duration_ms: 70 +read_rows: 0 +read_bytes: 0 +written_rows: 1 +written_bytes: 12 +memory_usage: 4300844 +peak_memory_usage: 4300844 +thread_name: TCPHandler +thread_id: 638133 +master_thread_id: 638133 +query: INSERT INTO test1 VALUES +is_initial_query: 1 +user: default +query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +address: ::ffff:127.0.0.1 +port: 33452 +initial_user: default +initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +initial_address: ::ffff:127.0.0.1 +initial_port: 33452 +interface: 1 +os_user: bharatnc +client_hostname: tower +client_name: ClickHouse +client_revision: 54437 +client_version_major: 20 +client_version_minor: 7 +client_version_patch: 2 +http_method: 0 +http_user_agent: +quota_key: +revision: 54440 +ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars'] +ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520] ``` **See Also** diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index bd92519b96b..3c3281ff8c6 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -6,6 +6,7 @@ Columns: - `event_date` (Date) — Date of the entry. - `event_time` (DateTime) — Time of the entry. +- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. - `microseconds` (UInt32) — Microseconds of the entry. - `thread_name` (String) — Name of the thread from which the logging was done. - `thread_id` (UInt64) — OS thread ID. @@ -25,4 +26,28 @@ Columns: - `source_file` (LowCardinality(String)) — Source file from which the logging was done. - `source_line` (UInt64) — Source line from which the logging was done. +**Example** + +``` sql +SELECT * FROM system.text_log LIMIT 1 \G +``` + +``` text +Row 1: +────── +event_date: 2020-09-10 +event_time: 2020-09-10 11:23:07 +event_time_microseconds: 2020-09-10 11:23:07.871397 +microseconds: 871397 +thread_name: clickhouse-serv +thread_id: 564917 +level: Information +query_id: +logger_name: DNSCacheUpdater +message: Update period 15 seconds +revision: 54440 +source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start() +source_line: 45 +``` + [Original article](https://clickhouse.tech/docs/en/operations/system_tables/text_log) \ No newline at end of file diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index b911fdd2263..0fb967822d2 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,6 +12,8 @@ Columns: - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment with microseconds precision. + - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. @@ -38,13 +40,16 @@ SELECT * FROM system.trace_log LIMIT 1 \G ``` text Row 1: ────── -event_date: 2019-11-15 -event_time: 2019-11-15 15:09:38 -revision: 54428 -timer_type: Real -thread_number: 48 -query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 -trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] +event_date: 2020-09-10 +event_time: 2020-09-10 11:23:09 +event_time_microseconds: 2020-09-10 11:23:09.872924 +timestamp_ns: 1599762189872924510 +revision: 54440 +trace_type: Memory +thread_id: 564963 +query_id: +trace: [371912858,371912789,371798468,371799717,371801313,371790250,624462773,566365041,566440261,566445834,566460071,566459914,566459842,566459580,566459469,566459389,566459341,566455774,371993941,371988245,372158848,372187428,372187309,372187093,372185478,140222123165193,140222122205443] +size: 5244400 ``` [Original article](https://clickhouse.tech/docs/en/operations/system_tables/trace_log) \ No newline at end of file diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c5986cd9ebc..910937b3fa9 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -357,7 +357,7 @@ SELECT date_trunc('hour', now()) ## now {#now} -Accepts zero arguments and returns the current time at one of the moments of request execution. +Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided. This function returns a constant, even if the request took a long time to complete. ## today {#today} diff --git a/docs/es/index.md b/docs/es/index.md index 9a2918ff7c8..45c41a2c7f9 100644 --- a/docs/es/index.md +++ b/docs/es/index.md @@ -1,15 +1,15 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd +machine_translated: false +machine_translated_rev: toc_priority: 0 -toc_title: "Descripci\xF3n" +toc_title: "Descripción" --- # ¿Qué es ClickHouse? {#what-is-clickhouse} -ClickHouse es un sistema de gestión de bases de datos orientado a columnas (DBMS) para el procesamiento analítico en línea de consultas (OLAP). +ClickHouse es un sistema de gestión de bases de datos (DBMS), orientado a columnas, para el procesamiento analítico de consultas en línea (OLAP). -En un “normal” DBMS orientado a filas, los datos se almacenan en este orden: +En un DBMS “normal”, orientado a filas, los datos se almacenan en este orden: | Fila | Argumento | JavaEnable | Titular | GoodEvent | EventTime | |------|-------------|------------|---------------------------|-----------|---------------------| @@ -36,7 +36,7 @@ Estos ejemplos solo muestran el orden en el que se organizan los datos. Los valo Ejemplos de un DBMS orientado a columnas: Vertica, Paraccel (Actian Matrix y Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise y Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid y kdb+. -Different orders for storing data are better suited to different scenarios. The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query – rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on. +Los diferentes modos de ordenar los datos al guardarlos se adecúan mejor a diferentes escenarios. El escenario de acceso a los datos se refiere a qué consultas se hacen, con qué frecuencia y en qué proporción; cuántos datos se leen para cada tipo de consulta - filas, columnas y bytes; la relación entre lectura y actualización de datos; el tamaño de trabajo de los datos y qué tan localmente son usados; si se usan transacciones y qué tan aisladas están;requerimientos de replicación de los datos y de integridad lógica, requerimientos de latencia y caudal (throughput) para cada tipo de consulta, y cosas por el estilo. Cuanto mayor sea la carga en el sistema, más importante es personalizar el sistema configurado para que coincida con los requisitos del escenario de uso, y más fino será esta personalización. No existe un sistema que sea igualmente adecuado para escenarios significativamente diferentes. Si un sistema es adaptable a un amplio conjunto de escenarios, bajo una carga alta, el sistema manejará todos los escenarios igualmente mal, o funcionará bien para solo uno o algunos de los escenarios posibles. diff --git a/docs/ru/introduction/info.md b/docs/ru/introduction/info.md index 14e517eebae..a9398b8c9cd 100644 --- a/docs/ru/introduction/info.md +++ b/docs/ru/introduction/info.md @@ -7,6 +7,6 @@ toc_priority: 100 Информационная поддержка ClickHouse осуществляется на всей территории Российской Федерации без ограничений посредством использования телефонной связи и средств электронной почты на русском языке в круглосуточном режиме: - Адрес электронной почты: -- Телефон: 8-800-250-96-39 (звонки бесплатны из всех регионов России) +- Телефон: +7-495-780-6510 [Оригинальная статья](https://clickhouse.tech/docs/ru/introduction/info/) diff --git a/docs/ru/sql-reference/statements/select/into-outfile.md b/docs/ru/sql-reference/statements/select/into-outfile.md index 8f0126068d1..0f5cf01e9d1 100644 --- a/docs/ru/sql-reference/statements/select/into-outfile.md +++ b/docs/ru/sql-reference/statements/select/into-outfile.md @@ -1,6 +1,6 @@ # Секция INTO OUTFILE {#into-outfile-clause} -Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filenam — строковый литерал). +Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filename — строковый литерал). ## Детали реализации {#implementation-details} diff --git a/docs/tools/build.py b/docs/tools/build.py index c91cc8d5f3c..bcbf3ac27cd 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -185,7 +185,7 @@ def build(args): test.test_templates(args.website_dir) if not args.skip_docs: - generate_cmake_flags_files(os.path.join(os.path.dirname(__file__), '..', '..')) + generate_cmake_flags_files() build_docs(args) from github import build_releases diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py index b15df76151e..1414ffc4b9e 100644 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ b/docs/tools/cmake_in_clickhouse_generator.py @@ -55,7 +55,7 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No anchor=make_anchor(name), name=name, path=path, - line=line if line > 0 else 1) + line=line) formatted_description: str = "".join(description.split("\n")) @@ -66,8 +66,8 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No entities[name] = path, formatted_entity -def process_file(root_path: str, input_name: str) -> None: - with open(os.path.join(root_path, input_name), 'r') as cmake_file: +def process_file(root_path: str, file_path: str, file_name: str) -> None: + with open(os.path.join(file_path, file_name), 'r') as cmake_file: contents: str = cmake_file.read() def get_line_and_comment(target: str) -> Tuple[int, str]: @@ -75,7 +75,7 @@ def process_file(root_path: str, input_name: str) -> None: comment: str = "" for n, line in enumerate(contents_list): - if line.find(target) == -1: + if 'option' not in line.lower() or target not in line: continue for maybe_comment_line in contents_list[n - 1::-1]: @@ -84,27 +84,35 @@ def process_file(root_path: str, input_name: str) -> None: comment = re.sub("\s*#\s*", "", maybe_comment_line) + " " + comment - return n, comment + # line numbering starts with 1 + return n + 1, comment matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE) + + file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name) + if file_rel_path_with_name.startswith('/'): + file_rel_path_with_name = file_rel_path_with_name[1:] + if matches: for entity in matches: - build_entity(os.path.join(root_path[6:], input_name), entity, get_line_and_comment(entity[0])) + build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0])) -def process_folder(root_path:str, name: str) -> None: +def process_folder(root_path: str, name: str) -> None: for root, _, files in os.walk(os.path.join(root_path, name)): for f in files: if f == "CMakeLists.txt" or ".cmake" in f: - process_file(root, f) + process_file(root_path, root, f) + +def generate_cmake_flags_files() -> None: + root_path: str = os.path.join(os.path.dirname(__file__), '..', '..') -def generate_cmake_flags_files(root_path: str) -> None: output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md") header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md") footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md") - process_file(root_path, "CMakeLists.txt") - process_file(root_path, "programs/CMakeLists.txt") + process_file(root_path, root_path, "CMakeLists.txt") + process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") process_folder(root_path, "base") process_folder(root_path, "cmake") @@ -132,14 +140,14 @@ def generate_cmake_flags_files(root_path: str) -> None: f.write(entities[k][1] + "\n") ignored_keys.append(k) - f.write("\n### External libraries system/bundled mode\n" + table_header) + f.write("\n\n### External libraries system/bundled mode\n" + table_header) for k in sorted_keys: if k.startswith("USE_INTERNAL_"): f.write(entities[k][1] + "\n") ignored_keys.append(k) - f.write("\n### Other flags\n" + table_header) + f.write("\n\n### Other flags\n" + table_header) for k in sorted(set(sorted_keys).difference(set(ignored_keys))): f.write(entities[k][1] + "\n") @@ -149,4 +157,4 @@ def generate_cmake_flags_files(root_path: str) -> None: if __name__ == '__main__': - generate_cmake_flags_files("../../") + generate_cmake_flags_files() diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index e31e43b99cd..68a72a6a785 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -18,7 +18,7 @@ Markdown==3.2.1 MarkupSafe==1.1.1 mkdocs==1.1.2 mkdocs-htmlproofer-plugin==0.0.3 -mkdocs-macros-plugin==0.4.13 +mkdocs-macros-plugin==0.4.17 nltk==3.5 nose==1.3.7 protobuf==3.13.0 diff --git a/docs/tools/translate/translate.py b/docs/tools/translate/translate.py index 6486a8cbcc7..343ab09f12a 100755 --- a/docs/tools/translate/translate.py +++ b/docs/tools/translate/translate.py @@ -49,13 +49,14 @@ def translate_impl(text, target_language=None): def translate(text, target_language=None): - result = [] - for part in re.split(curly_braces_re, text): - if part.startswith('{') and part.endswith('}'): - result.append(part) - else: - result.append(translate_impl(part, target_language=target_language)) - return ''.join(result) + return "".join( + [ + part + if part.startswith("{") and part.endswith("}") + else translate_impl(part, target_language=target_language) + for part in re.split(curly_braces_re, text) + ] + ) def translate_toc(root, lang): diff --git a/docs/zh/guides/apply-catboost-model.md b/docs/zh/guides/apply-catboost-model.md index 3657a947ad2..8368fde0d26 100644 --- a/docs/zh/guides/apply-catboost-model.md +++ b/docs/zh/guides/apply-catboost-model.md @@ -7,9 +7,11 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" # 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse} -[CatBoost](https://catboost.ai) 是一个自由和开源的梯度提升库开发 [Yandex](https://yandex.com/company/) 用于机器学习。 +[CatBoost](https://catboost.ai) 是一个用于机器学习的免费开源梯度提升开发库 [Yandex](https://yandex.com/company/) 。 + + +通过这篇指导,您将学会如何将预先从SQL推理出的运行模型作为训练好的模型应用到ClickHouse中去。 -通过此指令,您将学习如何通过从SQL运行模型推理在ClickHouse中应用预先训练好的模型。 在ClickHouse中应用CatBoost模型: @@ -18,18 +20,18 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" 3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可选步骤)。 4. [从SQL运行模型推理](#run-model-inference). -有关训练CatBoost模型的详细信息,请参阅 [培训和应用模型](https://catboost.ai/docs/features/training.html#training). +有关训练CatBoost模型的详细信息,请参阅 [训练和使用模型](https://catboost.ai/docs/features/training.html#training). ## 先决条件 {#prerequisites} -如果你没有 [Docker](https://docs.docker.com/install/) 然而,安装它。 +请先安装好 [Docker](https://docs.docker.com/install/)。 !!! note "注" [Docker](https://www.docker.com) 是一个软件平台,允许您创建容器,将CatBoost和ClickHouse安装与系统的其余部分隔离。 在应用CatBoost模型之前: -**1.** 拉 [码头窗口映像](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) 从注册表: +**1.** 从容器仓库拉取docker映像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) : ``` bash $ docker pull yandex/tutorial-catboost-clickhouse @@ -126,15 +128,15 @@ FROM amazon_train CatBoost集成到ClickHouse步骤: -**1.** 构建评估库。 +**1.** 构建测试库文件。 -评估CatBoost模型的最快方法是编译 `libcatboostmodel.` 图书馆. 有关如何构建库的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). +测试CatBoost模型的最快方法是编译 `libcatboostmodel.` 库文件. 有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). -**2.** 例如,在任何地方和任何名称创建一个新目录, `data` 并将创建的库放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`. +**2.** 任意创建一个新目录, 如 `data` 并将创建的库文件放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`. -**3.** 例如,在任何地方和任何名称为config model创建一个新目录, `models`. +**3.** 任意创建一个新目录来放配置模型, 如 `models`. -**4.** 创建具有任意名称的模型配置文件,例如, `models/amazon_model.xml`. +**4.** 任意创建一个模型配置文件,如 `models/amazon_model.xml`. **5.** 描述模型配置: @@ -153,7 +155,7 @@ CatBoost集成到ClickHouse步骤: ``` -**6.** 将CatBoost的路径和模型配置添加到ClickHouse配置: +**6.** 将CatBoost库文件的路径和模型配置添加到ClickHouse配置: ``` xml @@ -161,11 +163,11 @@ CatBoost集成到ClickHouse步骤: /home/catboost/models/*_model.xml ``` -## 4. 从SQL运行模型推理 {#run-model-inference} +## 4. 运行从SQL推理的模型 {#run-model-inference} -对于测试模型,运行ClickHouse客户端 `$ clickhouse client`. +测试模型是否正常,运行ClickHouse客户端 `$ clickhouse client`. -让我们确保模型正常工作: +让我们确保模型能正常工作: ``` sql :) SELECT @@ -182,10 +184,10 @@ CatBoost集成到ClickHouse步骤: ACTION AS target FROM amazon_train LIMIT 10 -``` +``` !!! note "注" - 功能 [模型值](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。 + 函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。 让我们预测一下: @@ -208,7 +210,7 @@ LIMIT 10 ``` !!! note "注" - 更多信息 [exp()](../sql-reference/functions/math-functions.md) 功能。 + 查看函数说明 [exp()](../sql-reference/functions/math-functions.md) 。 让我们计算样本的LogLoss: @@ -234,6 +236,6 @@ FROM ``` !!! note "注" - 更多信息 [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) 和 [日志()](../sql-reference/functions/math-functions.md) 功能。 + 查看函数说明 [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) 和 [log()](../sql-reference/functions/math-functions.md) 。 [原始文章](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index ac18a131c3a..e85e7a21261 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -80,7 +80,7 @@ Suggest::Suggest() "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", - "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "FOR", "RANDOMIZED", + "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE"}; } diff --git a/programs/format/CMakeLists.txt b/programs/format/CMakeLists.txt index ab06708cd3a..49f17ef163f 100644 --- a/programs/format/CMakeLists.txt +++ b/programs/format/CMakeLists.txt @@ -5,6 +5,9 @@ set (CLICKHOUSE_FORMAT_LINK boost::program_options clickhouse_common_io clickhouse_parsers + clickhouse_functions + clickhouse_aggregate_functions + clickhouse_table_functions dbms ) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index daf2d671568..01f952bf95e 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -1,13 +1,29 @@ #include +#include +#include #include #include #include +#include #include #include #include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" @@ -22,6 +38,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ("oneline", "format in single line") ("quiet,q", "just check syntax, no output on success") ("multiquery,n", "allow multiple queries in the same file") + ("obfuscate", "obfuscate instead of formatting") + ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") ; boost::program_options::variables_map options; @@ -40,10 +58,17 @@ int mainEntryClickHouseFormat(int argc, char ** argv) bool oneline = options.count("oneline"); bool quiet = options.count("quiet"); bool multiple = options.count("multiquery"); + bool obfuscate = options.count("obfuscate"); - if (quiet && (hilite || oneline)) + if (quiet && (hilite || oneline || obfuscate)) { - std::cerr << "Options 'hilite' or 'oneline' have no sense in 'quiet' mode." << std::endl; + std::cerr << "Options 'hilite' or 'oneline' or 'obfuscate' have no sense in 'quiet' mode." << std::endl; + return 2; + } + + if (obfuscate && (hilite || oneline || quiet)) + { + std::cerr << "Options 'hilite' or 'oneline' or 'quiet' have no sense in 'obfuscate' mode." << std::endl; return 2; } @@ -51,21 +76,66 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ReadBufferFromFileDescriptor in(STDIN_FILENO); readStringUntilEOF(query, in); - const char * pos = query.data(); - const char * end = pos + query.size(); - - ParserQuery parser(end); - do + if (obfuscate) { - ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - if (!quiet) + WordMap obfuscated_words_map; + WordSet used_nouns; + SipHash hash_func; + + if (options.count("seed")) { - formatAST(*res, std::cout, hilite, oneline); - if (multiple) - std::cout << "\n;\n"; - std::cout << std::endl; + std::string seed; + hash_func.update(options["seed"].as()); } - } while (multiple && pos != end); + + SharedContextHolder shared_context = Context::createShared(); + Context context = Context::createGlobal(shared_context.get()); + context.makeGlobalContext(); + + registerFunctions(); + registerAggregateFunctions(); + registerTableFunctions(); + registerStorages(); + + std::unordered_set additional_names; + + auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames(); + auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames(); + + additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end()); + additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end()); + + KnownIdentifierFunc is_known_identifier = [&](std::string_view name) + { + std::string what(name); + + return FunctionFactory::instance().tryGet(what, context) != nullptr + || AggregateFunctionFactory::instance().isAggregateFunctionName(what) + || TableFunctionFactory::instance().isTableFunctionName(what) + || additional_names.count(what); + }; + + WriteBufferFromFileDescriptor out(STDOUT_FILENO); + obfuscateQueries(query, out, obfuscated_words_map, used_nouns, hash_func, is_known_identifier); + } + else + { + const char * pos = query.data(); + const char * end = pos + query.size(); + + ParserQuery parser(end); + do + { + ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + if (!quiet) + { + formatAST(*res, std::cout, hilite, oneline); + if (multiple) + std::cout << "\n;\n"; + std::cout << std::endl; + } + } while (multiple && pos != end); + } } catch (...) { diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index bd60fbb63ba..ae0c22c8fcc 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -552,7 +552,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) #if defined(__linux__) fmt::print("Setting capabilities for clickhouse binary. This is optional.\n"); - std::string command = fmt::format("command setcap && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {}", main_bin_path.string()); + std::string command = fmt::format("command -v setcap && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {}", main_bin_path.string()); fmt::print(" {}\n", command); executeScript(command); #endif diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 5cdf5766a44..41da477152c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -210,7 +210,7 @@ try /// Maybe useless if (config().has("macros")) - context->setMacros(std::make_unique(config(), "macros")); + context->setMacros(std::make_unique(config(), "macros", log)); /// Skip networking diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b703d0ac6a7..0019ba7e76c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -534,7 +534,7 @@ int Server::main(const std::vector & /*args*/) } if (config().has("macros")) - global_context->setMacros(std::make_unique(config(), "macros")); + global_context->setMacros(std::make_unique(config(), "macros", log)); /// Initialize main config reloader. std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); @@ -559,7 +559,7 @@ int Server::main(const std::vector & /*args*/) //setTextLog(global_context->getTextLog()); //buildLoggers(*config, logger()); global_context->setClustersConfig(config); - global_context->setMacros(std::make_unique(*config, "macros")); + global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); /// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default) @@ -671,6 +671,10 @@ int Server::main(const std::vector & /*args*/) total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + /// Set current database name before loading tables and databases because + /// system logs may copy global context. + global_context->setCurrentDatabaseNameInGlobalContext(default_database); + LOG_INFO(log, "Loading metadata from {}", path); try @@ -678,11 +682,14 @@ int Server::main(const std::vector & /*args*/) loadMetadataSystem(*global_context); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); + auto & database_catalog = DatabaseCatalog::instance(); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) - attachSystemTablesServer(*DatabaseCatalog::instance().getSystemDatabase(), has_zookeeper); + attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper); /// Then, load remaining databases loadMetadata(*global_context, default_database); - DatabaseCatalog::instance().loadDatabases(); + database_catalog.loadDatabases(); + /// After loading validate that default database exists + database_catalog.assertDatabaseExists(default_database); } catch (...) { @@ -745,8 +752,6 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created" " (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe)."); - global_context->setCurrentDatabase(default_database); - if (has_zookeeper && config().has("distributed_ddl")) { /// DDL worker should be started after all tables were loaded diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 9ceb7976f4a..7c254668f8d 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -115,7 +115,7 @@ public: : IAggregateFunctionDataHelper(arguments, {}) { const auto x_arg = arguments.at(0).get(); - const auto y_arg = arguments.at(0).get(); + const auto y_arg = arguments.at(1).get(); if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber()) throw Exception("Illegal types of arguments of aggregate function " + getName() + ", must have number representation.", diff --git a/src/Common/BitHelpers.h b/src/Common/BitHelpers.h index 699e379b8d3..e79daeba14e 100644 --- a/src/Common/BitHelpers.h +++ b/src/Common/BitHelpers.h @@ -1,22 +1,12 @@ #pragma once #include +#include #include #include #include -/** Returns log2 of number, rounded down. - * Compiles to single 'bsr' instruction on x86. - * For zero argument, result is unspecified. - */ -inline unsigned int bitScanReverse(unsigned int x) -{ - assert(x != 0); - return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x); -} - - /** For zero argument, result is zero. * For arguments with most significand bit set, result is n. * For other arguments, returns value, rounded up to power of two. @@ -41,10 +31,9 @@ inline size_t roundUpToPowerOfTwoOrZero(size_t n) template -inline size_t getLeadingZeroBits(T x) +inline size_t getLeadingZeroBitsUnsafe(T x) { - if (!x) - return sizeof(x) * 8; + assert(x != 0); if constexpr (sizeof(T) <= sizeof(unsigned int)) { @@ -60,10 +49,32 @@ inline size_t getLeadingZeroBits(T x) } } + +template +inline size_t getLeadingZeroBits(T x) +{ + if (!x) + return sizeof(x) * 8; + + return getLeadingZeroBitsUnsafe(x); +} + +/** Returns log2 of number, rounded down. + * Compiles to single 'bsr' instruction on x86. + * For zero argument, result is unspecified. + */ +template +inline uint32_t bitScanReverse(T x) +{ + return (std::max(sizeof(T), sizeof(unsigned int))) * 8 - 1 - getLeadingZeroBitsUnsafe(x); +} + // Unsafe since __builtin_ctz()-family explicitly state that result is undefined on x == 0 template inline size_t getTrailingZeroBitsUnsafe(T x) { + assert(x != 0); + if constexpr (sizeof(T) <= sizeof(unsigned int)) { return __builtin_ctz(x); @@ -88,8 +99,8 @@ inline size_t getTrailingZeroBits(T x) } /** Returns a mask that has '1' for `bits` LSB set: - * maskLowBits(3) => 00000111 - */ + * maskLowBits(3) => 00000111 + */ template inline T maskLowBits(unsigned char bits) { diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp index e3735c44359..b8e25499c0b 100644 --- a/src/Common/Macros.cpp +++ b/src/Common/Macros.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -12,19 +13,32 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String & root_key) +Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String & root_key, Poco::Logger * log) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(root_key, keys); for (const String & key : keys) { macros[key] = config.getString(root_key + "." + key); + if (key == "database" || key == "table" || key == "uuid") + { + if (log) + LOG_WARNING(log, + "Config file contains '{}' macro. This macro has special meaning " + "and it's explicit definition is not recommended. Implicit unfolding for " + "'database', 'table' and 'uuid' macros will be disabled.", + key); + enable_special_macros = false; + } } } String Macros::expand(const String & s, MacroExpansionInfo & info) const { + /// Do not allow recursion if we expand only special macros, because it will be infinite recursion + assert(info.level == 0 || !info.expand_special_macros_only); + if (s.find('{') == String::npos) return s; @@ -34,6 +48,10 @@ String Macros::expand(const String & s, if (info.level >= 10) throw Exception("Too deep recursion while expanding macros: '" + s + "'", ErrorCodes::SYNTAX_ERROR); + /// If config file contains explicit special macro, then we do not expand it in this mode. + if (!enable_special_macros && info.expand_special_macros_only) + return s; + String res; size_t pos = 0; while (true) @@ -59,15 +77,21 @@ String Macros::expand(const String & s, auto it = macros.find(macro_name); /// Prefer explicit macros over implicit. - if (it != macros.end()) + if (it != macros.end() && !info.expand_special_macros_only) res += it->second; - else if (macro_name == "database" && !info.database_name.empty()) - res += info.database_name; - else if (macro_name == "table" && !info.table_name.empty()) - res += info.table_name; + else if (macro_name == "database" && !info.table_id.database_name.empty()) + { + res += info.table_id.database_name; + info.expanded_database = true; + } + else if (macro_name == "table" && !info.table_id.table_name.empty()) + { + res += info.table_id.table_name; + info.expanded_table = true; + } else if (macro_name == "uuid") { - if (info.uuid == UUIDHelpers::Nil) + if (info.table_id.uuid == UUIDHelpers::Nil) throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree " "are supported only for ON CLUSTER queries with Atomic database engine", ErrorCodes::SYNTAX_ERROR); @@ -76,12 +100,16 @@ String Macros::expand(const String & s, /// It becomes impossible to check if {uuid} is contained inside some unknown macro. if (info.level) throw Exception("Macro 'uuid' should not be inside another macro", ErrorCodes::SYNTAX_ERROR); - res += toString(info.uuid); + res += toString(info.table_id.uuid); info.expanded_uuid = true; } - else if (info.ignore_unknown) + else if (info.ignore_unknown || info.expand_special_macros_only) { + if (info.expand_special_macros_only) + res += '{'; res += macro_name; + if (info.expand_special_macros_only) + res += '}'; info.has_unknown = true; } else @@ -93,6 +121,9 @@ String Macros::expand(const String & s, } ++info.level; + if (info.expand_special_macros_only) + return res; + return expand(res, info); } @@ -113,9 +144,9 @@ String Macros::expand(const String & s) const String Macros::expand(const String & s, const StorageID & table_id, bool allow_uuid) const { MacroExpansionInfo info; - info.database_name = table_id.database_name; - info.table_name = table_id.table_name; - info.uuid = allow_uuid ? table_id.uuid : UUIDHelpers::Nil; + info.table_id = table_id; + if (!allow_uuid) + info.table_id.uuid = UUIDHelpers::Nil; return expand(s, info); } diff --git a/src/Common/Macros.h b/src/Common/Macros.h index 6e4f25d55ef..3082452e297 100644 --- a/src/Common/Macros.h +++ b/src/Common/Macros.h @@ -13,6 +13,7 @@ namespace Poco { class AbstractConfiguration; } + class Logger; } @@ -25,18 +26,19 @@ class Macros { public: Macros() = default; - Macros(const Poco::Util::AbstractConfiguration & config, const String & key); + Macros(const Poco::Util::AbstractConfiguration & config, const String & key, Poco::Logger * log = nullptr); struct MacroExpansionInfo { /// Settings - String database_name; - String table_name; - UUID uuid = UUIDHelpers::Nil; + StorageID table_id = StorageID::createEmpty(); bool ignore_unknown = false; + bool expand_special_macros_only = false; /// Information about macro expansion size_t level = 0; + bool expanded_database = false; + bool expanded_table = false; bool expanded_uuid = false; bool has_unknown = false; }; @@ -64,6 +66,7 @@ public: private: MacroMap macros; + bool enable_special_macros = true; }; diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index bbb8801f190..992419adb6d 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -35,12 +35,14 @@ namespace ErrorCodes extern const int CANNOT_CREATE_CHILD_PROCESS; } -ShellCommand::ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_) +ShellCommand::ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, bool terminate_in_destructor_) : pid(pid_) , terminate_in_destructor(terminate_in_destructor_) , in(in_fd_) , out(out_fd_) - , err(err_fd_) {} + , err(err_fd_) +{ +} Poco::Logger * ShellCommand::getLogger() { @@ -144,12 +146,6 @@ std::unique_ptr ShellCommand::executeImpl( pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor)); LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid); - - /// Now the ownership of the file descriptors is passed to the result. - pipe_stdin.fds_rw[1] = -1; - pipe_stdout.fds_rw[0] = -1; - pipe_stderr.fds_rw[0] = -1; - return res; } diff --git a/src/Common/ShellCommand.h b/src/Common/ShellCommand.h index e609a3fc85b..1c681227efd 100644 --- a/src/Common/ShellCommand.h +++ b/src/Common/ShellCommand.h @@ -30,7 +30,7 @@ private: bool wait_called = false; bool terminate_in_destructor; - ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_); + ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, bool terminate_in_destructor_); static Poco::Logger * getLogger(); diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index 772caa75373..a7f5e76d5be 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -13,7 +13,6 @@ inline UInt64 clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); } - /** Differs from Poco::Stopwatch only by using 'clock_gettime' instead of 'gettimeofday', * returns nanoseconds instead of microseconds, and also by other minor differencies. */ diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index a1e8fb79435..904e3035dd8 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -67,10 +67,19 @@ inline bool isASCII(char c) return static_cast(c) < 0x80; } +inline bool isLowerAlphaASCII(char c) +{ + return (c >= 'a' && c <= 'z'); +} + +inline bool isUpperAlphaASCII(char c) +{ + return (c >= 'A' && c <= 'Z'); +} + inline bool isAlphaASCII(char c) { - return (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z'); + return isLowerAlphaASCII(c) || isUpperAlphaASCII(c); } inline bool isNumericASCII(char c) @@ -122,6 +131,16 @@ inline bool isPrintableASCII(char c) return uc >= 32 && uc <= 126; /// 127 is ASCII DEL. } +inline bool isPunctuationASCII(char c) +{ + uint8_t uc = c; + return (uc >= 33 && uc <= 47) + || (uc >= 58 && uc <= 64) + || (uc >= 91 && uc <= 96) + || (uc >= 123 && uc <= 125); +} + + inline bool isValidIdentifier(const std::string_view & str) { return !str.empty() && isValidIdentifierBegin(str[0]) && std::all_of(str.begin() + 1, str.end(), isWordCharASCII); diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index dda16c7725d..21116e9d432 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -234,10 +234,16 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ std::is_same_v ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive); job(); + /// job should be reseted before decrementing scheduled_jobs to + /// ensure that the Job destroyed before wait() returns. job = {}; } catch (...) { + /// job should be reseted before decrementing scheduled_jobs to + /// ensure that the Job destroyed before wait() returns. + job = {}; + { std::unique_lock lock(mutex); if (!first_exception) diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 55796905b73..eb385547fa8 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -11,6 +11,7 @@ #include #include +#include /** Very simple thread pool similar to boost::threadpool. @@ -161,21 +162,19 @@ public: GlobalThreadPool::instance().scheduleOrThrow([ state = state, func = std::forward(func), - args = std::make_tuple(std::forward(args)...)] + args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { - try - { - /// Thread status holds raw pointer on query context, thus it always must be destroyed - /// before sending signal that permits to join this thread. - DB::ThreadStatus thread_status; - std::apply(func, args); - } - catch (...) - { - state->set(); - throw; - } - state->set(); + SCOPE_EXIT(state->set()); + + /// This moves are needed to destroy function and arguments before exit. + /// It will guarantee that after ThreadFromGlobalPool::join all captured params are destroyed. + auto function = std::move(func); + auto arguments = std::move(args); + + /// Thread status holds raw pointer on query context, thus it always must be destroyed + /// before sending signal that permits to join this thread. + DB::ThreadStatus thread_status; + std::apply(function, arguments); }); } diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index c69c9d1dc1a..104b747d431 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -141,8 +141,14 @@ void TraceCollector::run() if (trace_log) { - UInt64 time = clock_gettime_ns(CLOCK_REALTIME); - TraceLogElement element{time_t(time / 1000000000), time, trace_type, thread_id, query_id, trace, size}; + // time and time_in_microseconds are both being constructed from the same timespec so that the + // times will be equal upto the precision of a second. + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + + UInt64 time = UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); + UInt64 time_in_microseconds = UInt64((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size}; trace_log->add(element); } } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 41d715b23f1..bee875d1c74 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -663,7 +663,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & { WaitForDisappearStatePtr state = std::make_shared(); - auto callback = [state](const Coordination::ExistsResponse & response) + auto callback = [state](const Coordination::GetResponse & response) { state->code = int32_t(response.error); if (state->code) @@ -683,8 +683,9 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & while (!condition || !condition()) { - /// NOTE: if the node doesn't exist, the watch will leak. - impl->exists(path, callback, watch); + /// Use getData insteand of exists to avoid watch leak. + impl->get(path, callback, watch); + if (!condition) state->event.wait(); else if (!state->event.tryWait(1000)) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 2ee0c5aba17..abb8158781b 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -422,6 +422,18 @@ void ZooKeeperRequest::write(WriteBuffer & out) const } +static void removeRootPath(String & path, const String & root_path) +{ + if (root_path.empty()) + return; + + if (path.size() <= root_path.size()) + throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY); + + path = path.substr(root_path.size()); +} + + struct ZooKeeperResponse : virtual Response { virtual ~ZooKeeperResponse() override = default; @@ -1092,8 +1104,6 @@ void ZooKeeper::sendThread() { info.request->has_watch = true; CurrentMetrics::add(CurrentMetrics::ZooKeeperWatch); - std::lock_guard lock(watches_mutex); - watches[info.request->getPath()].emplace_back(std::move(info.watch)); } if (expired) @@ -1278,6 +1288,30 @@ void ZooKeeper::receiveEvent() response->removeRootPath(root_path); } + /// Instead of setting the watch in sendEvent, set it in receiveEvent becuase need to check the response. + /// The watch shouldn't be set if the node does not exist and it will never exist like sequential ephemeral nodes. + /// By using getData() instead of exists(), a watch won't be set if the node doesn't exist. + if (request_info.watch) + { + bool add_watch = false; + /// 3 indicates the ZooKeeperExistsRequest. + // For exists, we set the watch on both node exist and nonexist case. + // For other case like getData, we only set the watch when node exists. + if (request_info.request->getOpNum() == 3) + add_watch = (response->error == Error::ZOK || response->error == Error::ZNONODE); + else + add_watch = response->error == Error::ZOK; + + if (add_watch) + { + /// The key of wathces should exclude the root_path + String req_path = request_info.request->getPath(); + removeRootPath(req_path, root_path); + std::lock_guard lock(watches_mutex); + watches[req_path].emplace_back(std::move(request_info.watch)); + } + } + int32_t actual_length = in->count() - count_before_event; if (length != actual_length) throw Exception("Response length doesn't match. Expected: " + toString(length) + ", actual: " + toString(actual_length), Error::ZMARSHALLINGERROR); diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp index a10d2589576..f3d6953bd2a 100644 --- a/src/Compression/CompressionCodecDelta.cpp +++ b/src/Compression/CompressionCodecDelta.cpp @@ -136,7 +136,7 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_ namespace { -UInt8 getDeltaBytesSize(DataTypePtr column_type) +UInt8 getDeltaBytesSize(const IDataType * column_type) { if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is not applicable for {} because the data type is not of fixed size", @@ -155,7 +155,7 @@ UInt8 getDeltaBytesSize(DataTypePtr column_type) void registerCodecDelta(CompressionCodecFactory & factory) { UInt8 method_code = UInt8(CompressionMethodByte::Delta); - factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr + factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr { UInt8 delta_bytes_size = 0; diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 96fd29fe356..6895a80264d 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -307,7 +307,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) } } -UInt8 getDataBytesSize(DataTypePtr column_type) +UInt8 getDataBytesSize(const IDataType * column_type) { if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec DoubleDelta is not applicable for {} because the data type is not of fixed size", @@ -413,7 +413,7 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory) { UInt8 method_code = UInt8(CompressionMethodByte::DoubleDelta); factory.registerCompressionCodecWithType("DoubleDelta", method_code, - [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr + [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr { if (arguments) throw Exception("Codec DoubleDelta does not accept any arguments", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp index d739623a94b..582a9f41874 100644 --- a/src/Compression/CompressionCodecGorilla.cpp +++ b/src/Compression/CompressionCodecGorilla.cpp @@ -222,7 +222,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) } } -UInt8 getDataBytesSize(DataTypePtr column_type) +UInt8 getDataBytesSize(const IDataType * column_type) { if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Gorilla is not applicable for {} because the data type is not of fixed size", @@ -329,7 +329,7 @@ void registerCodecGorilla(CompressionCodecFactory & factory) { UInt8 method_code = UInt8(CompressionMethodByte::Gorilla); factory.registerCompressionCodecWithType("Gorilla", method_code, - [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr + [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr { if (arguments) throw Exception("Codec Gorilla does not accept any arguments", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index f081652f613..f0fb8351410 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -136,7 +136,7 @@ TypeIndex baseType(TypeIndex type_idx) return TypeIndex::Nothing; } -TypeIndex typeIdx(const DataTypePtr & data_type) +TypeIndex typeIdx(const IDataType * data_type) { if (!data_type) return TypeIndex::Nothing; @@ -656,7 +656,7 @@ void CompressionCodecT64::updateHash(SipHash & hash) const void registerCodecT64(CompressionCodecFactory & factory) { - auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr + auto reg_func = [&](const ASTPtr & arguments, const IDataType * type) -> CompressionCodecPtr { Variant variant = Variant::Byte; @@ -683,7 +683,7 @@ void registerCodecT64(CompressionCodecFactory & factory) auto type_idx = typeIdx(type); if (type && type_idx == TypeIndex::Nothing) - throw Exception("T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); + throw Exception("T64 codec is not supported for specified type " + type->getName(), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); return std::make_shared(type_idx, variant); }; diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index cebeee7c5a8..91b4aa4b8de 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -57,7 +58,7 @@ void CompressionCodecFactory::validateCodec(const String & family_name, std::opt } } -ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr & ast, DataTypePtr column_type, bool sanity_check) const +ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check) const { if (const auto * func = ast->as()) { @@ -67,6 +68,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr bool has_none = false; std::optional generic_compression_codec_pos; + bool can_substitute_codec_arguments = true; for (size_t i = 0; i < func->arguments->children.size(); ++i) { const auto & inner_codec_ast = func->arguments->children[i]; @@ -99,7 +101,34 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr } else { - result_codec = getImpl(codec_family_name, codec_arguments, column_type); + if (column_type) + { + CompressionCodecPtr prev_codec; + IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & substream_type) + { + if (IDataType::isSpecialCompressionAllowed(substream_path)) + { + result_codec = getImpl(codec_family_name, codec_arguments, &substream_type); + + /// Case for column Tuple, which compressed with codec which depends on data type, like Delta. + /// We cannot substitute parameters for such codecs. + if (prev_codec && prev_codec->getHash() != result_codec->getHash()) + can_substitute_codec_arguments = false; + prev_codec = result_codec; + } + }; + + IDataType::SubstreamPath stream_path; + column_type->enumerateStreams(callback, stream_path); + + if (!result_codec) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName()); + } + else + { + result_codec = getImpl(codec_family_name, codec_arguments, nullptr); + } + codecs_descriptions->children.emplace_back(result_codec->getCodecDesc()); } @@ -140,16 +169,30 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr " (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS); } - std::shared_ptr result = std::make_shared(); - result->name = "CODEC"; - result->arguments = codecs_descriptions; - return result; + /// For columns with nested types like Tuple(UInt32, UInt64) we + /// obviously cannot substitute parameters for codecs which depend on + /// data type, because for the first column Delta(4) is suitable and + /// Delta(8) for the second. So we should leave codec description as is + /// and deduce them in get method for each subtype separately. For all + /// other types it's better to substitute parameters, for better + /// readability and backward compatibility. + if (can_substitute_codec_arguments) + { + std::shared_ptr result = std::make_shared(); + result->name = "CODEC"; + result->arguments = codecs_descriptions; + return result; + } + else + { + return ast; + } } throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC); } -CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr column_type, CompressionCodecPtr current_default) const +CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const { if (current_default == nullptr) current_default = default_codec; @@ -175,10 +218,16 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr else throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + CompressionCodecPtr codec; if (codec_family_name == DEFAULT_CODEC_NAME) - codecs.emplace_back(current_default); + codec = current_default; else - codecs.emplace_back(getImpl(codec_family_name, codec_arguments, column_type)); + codec = getImpl(codec_family_name, codec_arguments, column_type); + + if (only_generic && !codec->isGenericCompression()) + continue; + + codecs.emplace_back(codec); } CompressionCodecPtr res; @@ -187,6 +236,8 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr return codecs.back(); else if (codecs.size() > 1) return std::make_shared(codecs); + else + return std::make_shared(); } throw Exception("Unexpected AST structure for compression codec: " + queryToString(ast), ErrorCodes::UNEXPECTED_AST_STRUCTURE); @@ -203,7 +254,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const } -CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const +CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const { if (family_name == "Multiple") throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC); @@ -235,7 +286,7 @@ void CompressionCodecFactory::registerCompressionCodecWithType( void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional byte_code, Creator creator) { - registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, DataTypePtr /* data_type */) + registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, const IDataType * /* data_type */) { return creator(ast); }); diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index f30050f81ba..dc06e853898 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -26,7 +26,7 @@ class CompressionCodecFactory final : private boost::noncopyable { protected: using Creator = std::function; - using CreatorWithType = std::function; + using CreatorWithType = std::function; using SimpleCreator = std::function; using CompressionCodecsDictionary = std::unordered_map; using CompressionCodecsCodeDictionary = std::unordered_map; @@ -38,7 +38,13 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, DataTypePtr column_type, bool sanity_check) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check) const; + + /// Just wrapper for previous method. + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check) const + { + return validateCodecAndGetPreprocessedAST(ast, column_type.get(), sanity_check); + } /// Validate codecs AST specified by user void validateCodec(const String & family_name, std::optional level, bool sanity_check) const; @@ -47,8 +53,18 @@ public: /// information about type to improve inner settings, but every codec should /// be able to work without information about type. Also AST can contain /// codec, which can be alias to current default codec, which can be changed - /// in runtime. - CompressionCodecPtr get(const ASTPtr & ast, DataTypePtr column_type, CompressionCodecPtr current_default = nullptr) const; + /// in runtime. If only_generic is true than method will filter all + /// isGenericCompression() == false codecs from result. If nothing found + /// will return codec NONE. It's useful for auxiliary parts of complex columns + /// like Nullable, Array and so on. If all codecs are non generic and + /// only_generic = true, than codec NONE will be returned. + CompressionCodecPtr get(const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default = nullptr, bool only_generic = false) const; + + /// Just wrapper for previous method. + CompressionCodecPtr get(const ASTPtr & ast, const DataTypePtr & column_type, CompressionCodecPtr current_default = nullptr, bool only_generic = false) const + { + return get(ast, column_type.get(), current_default, only_generic); + } /// Get codec by method byte (no params available) CompressionCodecPtr get(const uint8_t byte_code) const; @@ -65,7 +81,7 @@ public: void registerSimpleCompressionCodec(const String & family_name, std::optional byte_code, SimpleCreator creator); protected: - CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const; + CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const; private: CompressionCodecsDictionary family_name_with_codec; diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp index 1b2c90e5163..baf6e9b2b86 100644 --- a/src/Compression/ICompressionCodec.cpp +++ b/src/Compression/ICompressionCodec.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index fa143af8b9c..2018218f2e7 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -17,7 +17,6 @@ using CompressionCodecPtr = std::shared_ptr; using Codecs = std::vector; class IDataType; -using DataTypePtr = std::shared_ptr; /** * Represents interface for compression codecs like LZ4, ZSTD, etc. diff --git a/src/Core/ColumnWithTypeAndName.cpp b/src/Core/ColumnWithTypeAndName.cpp index 29da8e86439..012645cb065 100644 --- a/src/Core/ColumnWithTypeAndName.cpp +++ b/src/Core/ColumnWithTypeAndName.cpp @@ -27,7 +27,7 @@ bool ColumnWithTypeAndName::operator==(const ColumnWithTypeAndName & other) cons } -void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const +void ColumnWithTypeAndName::dumpNameAndType(WriteBuffer & out) const { out << name; @@ -35,6 +35,11 @@ void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const out << ' ' << type->getName(); else out << " nullptr"; +} + +void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const +{ + dumpNameAndType(out); if (column) out << ' ' << column->dumpStructure(); diff --git a/src/Core/ColumnWithTypeAndName.h b/src/Core/ColumnWithTypeAndName.h index 27b09710258..2ecc2df9b5a 100644 --- a/src/Core/ColumnWithTypeAndName.h +++ b/src/Core/ColumnWithTypeAndName.h @@ -33,6 +33,7 @@ struct ColumnWithTypeAndName ColumnWithTypeAndName cloneEmpty() const; bool operator==(const ColumnWithTypeAndName & other) const; + void dumpNameAndType(WriteBuffer & out) const; void dumpStructure(WriteBuffer & out) const; String dumpStructure() const; }; diff --git a/src/Core/MultiEnum.h b/src/Core/MultiEnum.h index ddfc5b13e86..a7e3393c9c3 100644 --- a/src/Core/MultiEnum.h +++ b/src/Core/MultiEnum.h @@ -86,7 +86,7 @@ struct MultiEnum return right.operator==(left); } - template + template >::type> friend bool operator!=(L left, MultiEnum right) { return !(right.operator==(left)); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9449cd571a1..6eb853828c5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -352,6 +352,7 @@ class IColumn; \ M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ + M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ @@ -368,7 +369,6 @@ class IColumn; \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \ - M(UInt64, multiple_joins_rewriter_version, 2, "1 or 2. Second rewriter version knows about table columns and keep not clashed names as is.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ @@ -399,6 +399,7 @@ class IColumn; M(UInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \ M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ + M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \ \ M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \ M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \ @@ -462,7 +463,7 @@ class IColumn; M(String, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \ \ M(String, format_regexp, "", "Regular expression (for Regexp format)", 0) \ - M(String, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \ + M(String, format_regexp_escaping_rule, "Raw", "Field escaping rule (for Regexp format)", 0) \ M(Bool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \ \ M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \ diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 49666cca428..ed570beeda6 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -151,7 +151,7 @@ namespace void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { path.push_back(Substream::ArraySizes); - callback(path); + callback(path, *this); path.back() = Substream::ArrayElements; nested->enumerateStreams(callback, path); path.pop_back(); diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index b4f28a8853f..6b1166dc26a 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -54,7 +54,7 @@ void DataTypeLowCardinality::enumerateStreams(const StreamCallback & callback, S path.push_back(Substream::DictionaryKeys); dictionary_type->enumerateStreams(callback, path); path.back() = Substream::DictionaryIndexes; - callback(path); + callback(path, *this); path.pop_back(); } @@ -774,7 +774,7 @@ void DataTypeLowCardinality::deserializeTextQuoted(IColumn & column, ReadBuffer void DataTypeLowCardinality::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeImpl(column, &IDataType::deserializeAsTextEscaped, istr, settings); + deserializeImpl(column, &IDataType::deserializeAsWholeText, istr, settings); } void DataTypeLowCardinality::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 9c738da9f6a..ed501939901 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -44,7 +44,7 @@ bool DataTypeNullable::onlyNull() const void DataTypeNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { path.push_back(Substream::NullMap); - callback(path); + callback(path, *this); path.back() = Substream::NullableElements; nested_data_type->enumerateStreams(callback, path); path.pop_back(); diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 561166cbc78..d1c9f1bde77 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -130,6 +130,18 @@ String IDataType::getFileNameForStream(const String & column_name, const IDataTy } +bool IDataType::isSpecialCompressionAllowed(const SubstreamPath & path) +{ + for (const Substream & elem : path) + { + if (elem.type == Substream::NullMap + || elem.type == Substream::ArraySizes + || elem.type == Substream::DictionaryIndexes) + return false; + } + return true; +} + void IDataType::insertDefaultInto(IColumn & column) const { column.insertDefault(); diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 6adcc0fda90..5e25d47534e 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -104,10 +104,11 @@ public: using SubstreamPath = std::vector; - using StreamCallback = std::function; + using StreamCallback = std::function; + virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { - callback(path); + callback(path, *this); } void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); } void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); } @@ -442,6 +443,10 @@ public: static String getFileNameForStream(const String & column_name, const SubstreamPath & path); + /// Substream path supports special compression methods like codec Delta. + /// For all other substreams (like ArraySizes, NullMasks, etc.) we use only + /// generic compression codecs like LZ4. + static bool isSpecialCompressionAllowed(const SubstreamPath & path); private: friend class DataTypeFactory; /// Customize this DataType @@ -685,4 +690,3 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime = t template <> inline constexpr bool IsDataTypeDateOrDateTime = true; } - diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ed17a8eccb1..46d56e97436 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -83,7 +83,7 @@ void DatabaseAtomic::attachTable(const String & name, const StoragePtr & table, assert(relative_table_path != data_path && !relative_table_path.empty()); DetachedTables not_in_use; std::unique_lock lock(mutex); - not_in_use = cleenupDetachedTables(); + not_in_use = cleanupDetachedTables(); auto table_id = table->getStorageID(); assertDetachedTableNotInUse(table_id.uuid); DatabaseWithDictionaries::attachTableUnlocked(name, table, lock); @@ -97,7 +97,7 @@ StoragePtr DatabaseAtomic::detachTable(const String & name) auto table = DatabaseWithDictionaries::detachTableUnlocked(name, lock); table_name_to_path.erase(name); detached_tables.emplace(table->getStorageID().uuid, table); - not_in_use = cleenupDetachedTables(); + not_in_use = cleanupDetachedTables(); return table; } @@ -207,11 +207,13 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot move dictionary to other database"); StoragePtr table = getTableUnlocked(table_name, db_lock); + table->checkTableCanBeRenamed(); assert_can_move_mat_view(table); StoragePtr other_table; if (exchange) { other_table = other_db.getTableUnlocked(to_table_name, other_db_lock); + other_table->checkTableCanBeRenamed(); assert_can_move_mat_view(other_table); } @@ -261,7 +263,7 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora if (query.database != database_name) throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed to `{}`, cannot create table in `{}`", database_name, query.database); - not_in_use = cleenupDetachedTables(); + not_in_use = cleanupDetachedTables(); assertDetachedTableNotInUse(query.uuid); renameNoReplace(table_metadata_tmp_path, table_metadata_path); attachTableUnlocked(query.table, table, lock); /// Should never throw @@ -304,7 +306,7 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid) ", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS); } -DatabaseAtomic::DetachedTables DatabaseAtomic::cleenupDetachedTables() +DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() { DetachedTables not_in_use; auto it = detached_tables.begin(); @@ -322,14 +324,14 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleenupDetachedTables() return not_in_use; } -void DatabaseAtomic::assertCanBeDetached(bool cleenup) +void DatabaseAtomic::assertCanBeDetached(bool cleanup) { - if (cleenup) + if (cleanup) { DetachedTables not_in_use; { std::lock_guard lock(mutex); - not_in_use = cleenupDetachedTables(); + not_in_use = cleanupDetachedTables(); } } std::lock_guard lock(mutex); @@ -498,6 +500,28 @@ void DatabaseAtomic::renameDictionaryInMemoryUnlocked(const StorageID & old_name const auto & dict = dynamic_cast(*result.object); dict.updateDictionaryName(new_name); } +void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid) +{ + { + std::lock_guard lock{mutex}; + if (detached_tables.count(uuid) == 0) + return; + } + + /// Table is in use while its shared_ptr counter is greater than 1. + /// We cannot trigger condvar on shared_ptr destruction, so it's busy wait. + while (true) + { + DetachedTables not_in_use; + { + std::lock_guard lock{mutex}; + not_in_use = cleanupDetachedTables(); + if (detached_tables.count(uuid) == 0) + return; + } + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } +} } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 02c922f8b91..2d90ed96f1d 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -51,13 +51,15 @@ public: void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override; /// Atomic database cannot be detached if there is detached table which still in use - void assertCanBeDetached(bool cleenup); + void assertCanBeDetached(bool cleanup); UUID tryGetTableUUID(const String & table_name) const override; void tryCreateSymlink(const String & table_name, const String & actual_data_path); void tryRemoveSymlink(const String & table_name); + void waitDetachedTableNotInUse(const UUID & uuid); + private: void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) override; void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table, @@ -65,7 +67,7 @@ private: void assertDetachedTableNotInUse(const UUID & uuid); typedef std::unordered_map DetachedTables; - [[nodiscard]] DetachedTables cleenupDetachedTables(); + [[nodiscard]] DetachedTables cleanupDetachedTables(); void tryCreateMetadataSymlink(); diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 1bf4db74bf0..2bb4c595c05 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -19,6 +19,7 @@ #if USE_MYSQL # include +# include # include # include # include @@ -83,7 +84,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS); if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by || - engine_define->sample_by || (engine_name != "MaterializeMySQL" && engine_define->settings)) + engine_define->sample_by || (!endsWith(engine_name, "MySQL") && engine_define->settings)) throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); @@ -133,8 +134,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String , std::move(materialize_mode_settings)); } + auto mysql_database_settings = std::make_unique(); + + mysql_database_settings->loadFromQueryContext(context); + mysql_database_settings->loadFromQuery(*engine_define); /// higher priority + return std::make_shared( - context, database_name, metadata_path, engine_define, mysql_database_name, std::move(mysql_pool)); + context, database_name, metadata_path, engine_define, mysql_database_name, std::move(mysql_database_settings), std::move(mysql_pool)); } catch (...) { diff --git a/src/Databases/MySQL/ConnectionMySQLSettings.cpp b/src/Databases/MySQL/ConnectionMySQLSettings.cpp new file mode 100644 index 00000000000..fa92e793225 --- /dev/null +++ b/src/Databases/MySQL/ConnectionMySQLSettings.cpp @@ -0,0 +1,65 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; + extern const int BAD_ARGUMENTS; +} + +IMPLEMENT_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS) + +void ConnectionMySQLSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + throw Exception(e.message() + " for database " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); + else + e.rethrow(); + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } + + SettingsChanges & changes = storage_def.settings->changes; +#define ADD_IF_ABSENT(NAME) \ + if (std::find_if(changes.begin(), changes.end(), \ + [](const SettingChange & c) { return c.name == #NAME; }) \ + == changes.end()) \ + changes.push_back(SettingChange{#NAME, static_cast(NAME)}); + + APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(ADD_IF_ABSENT) +#undef ADD_IF_ABSENT +} + +void ConnectionMySQLSettings::loadFromQueryContext(const Context & context) +{ + if (!context.hasQueryContext()) + return; + + const Settings & settings = context.getQueryContext().getSettingsRef(); + + if (settings.mysql_datatypes_support_level.value != mysql_datatypes_support_level.value) + set("mysql_datatypes_support_level", settings.mysql_datatypes_support_level.toString()); +} + + +} diff --git a/src/Databases/MySQL/ConnectionMySQLSettings.h b/src/Databases/MySQL/ConnectionMySQLSettings.h new file mode 100644 index 00000000000..90279f846a4 --- /dev/null +++ b/src/Databases/MySQL/ConnectionMySQLSettings.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class Context; +class ASTStorage; + +#define LIST_OF_CONNECTION_MYSQL_SETTINGS(M) \ + M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \ + +/// Settings that should not change after the creation of a database. +#define APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(M) \ + M(mysql_datatypes_support_level) + +DECLARE_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS) + + +/** Settings for the MySQL database engine. + * Could be loaded from a CREATE DATABASE query (SETTINGS clause) and Query settings. + */ +struct ConnectionMySQLSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); + + void loadFromQueryContext(const Context & context); +}; + +} diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp index 9c94014bf23..03d218d132f 100644 --- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp +++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp @@ -45,13 +45,13 @@ static constexpr const std::chrono::seconds cleaner_sleep_time{30}; static const std::chrono::seconds lock_acquire_timeout{10}; DatabaseConnectionMySQL::DatabaseConnectionMySQL(const Context & context, const String & database_name_, const String & metadata_path_, - const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, mysqlxx::Pool && pool) + const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, std::unique_ptr settings_, mysqlxx::Pool && pool) : IDatabase(database_name_) , global_context(context.getGlobalContext()) , metadata_path(metadata_path_) , database_engine_define(database_engine_define_->clone()) , database_name_in_mysql(database_name_in_mysql_) - , mysql_datatypes_support_level(context.getQueryContext().getSettingsRef().mysql_datatypes_support_level) + , database_settings(std::move(settings_)) , mysql_pool(std::move(pool)) { empty(); /// test database is works fine. @@ -133,9 +133,20 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr columns_expression_list->children.emplace_back(column_declaration); } + ASTStorage * ast_storage = table_storage_define->as(); + ASTs storage_children = ast_storage->children; + auto storage_engine_arguments = ast_storage->engine->arguments; + + /// Add table_name to engine arguments auto mysql_table_name = std::make_shared(table_id.table_name); - auto storage_engine_arguments = table_storage_define->as()->engine->arguments; storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, mysql_table_name); + + /// Unset settings + storage_children.erase( + std::remove_if(storage_children.begin(), storage_children.end(), + [&](const ASTPtr & element) { return element.get() == ast_storage->settings; }), + storage_children.end()); + ast_storage->settings = nullptr; } return create_table_query; @@ -273,7 +284,7 @@ std::map DatabaseConnectionMySQL::fetchTablesColumnsL database_name_in_mysql, tables_name, settings.external_table_functions_use_nulls, - mysql_datatypes_support_level); + database_settings->mysql_datatypes_support_level); } void DatabaseConnectionMySQL::shutdown() diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.h b/src/Databases/MySQL/DatabaseConnectionMySQL.h index e9f72adc013..7bf5e8c1d88 100644 --- a/src/Databases/MySQL/DatabaseConnectionMySQL.h +++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,8 @@ public: DatabaseConnectionMySQL( const Context & context, const String & database_name, const String & metadata_path, - const ASTStorage * database_engine_define, const String & database_name_in_mysql, mysqlxx::Pool && pool); + const ASTStorage * database_engine_define, const String & database_name_in_mysql, std::unique_ptr settings_, + mysqlxx::Pool && pool); String getEngineName() const override { return "MySQL"; } @@ -76,9 +78,7 @@ private: String metadata_path; ASTPtr database_engine_define; String database_name_in_mysql; - // Cache setting for later from query context upon creation, - // so column types depend on the settings set at query-level. - MultiEnum mysql_datatypes_support_level; + std::unique_ptr database_settings; std::atomic quit{false}; std::condition_variable cond; diff --git a/src/Databases/ya.make b/src/Databases/ya.make index 726127bfe52..b4173057e03 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -17,6 +17,7 @@ SRCS( DatabaseOrdinary.cpp DatabasesCommon.cpp DatabaseWithDictionaries.cpp + MySQL/ConnectionMySQLSettings.cpp MySQL/DatabaseConnectionMySQL.cpp MySQL/DatabaseMaterializeMySQL.cpp MySQL/FetchTablesColumnsList.cpp diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 522149d3cfd..368b760044d 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -331,6 +331,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory); void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); +void registerFileSegmentationEngineLineAsString(FormatFactory & factory); /// Formats for both input/output. @@ -367,6 +368,8 @@ void registerInputFormatProcessorArrow(FormatFactory & factory); void registerOutputFormatProcessorArrow(FormatFactory & factory); void registerInputFormatProcessorAvro(FormatFactory & factory); void registerOutputFormatProcessorAvro(FormatFactory & factory); +void registerInputFormatProcessorRawBLOB(FormatFactory & factory); +void registerOutputFormatProcessorRawBLOB(FormatFactory & factory); /// Output only (presentational) formats. @@ -400,6 +403,7 @@ FormatFactory::FormatFactory() registerFileSegmentationEngineJSONEachRow(*this); registerFileSegmentationEngineRegexp(*this); registerFileSegmentationEngineJSONAsString(*this); + registerFileSegmentationEngineLineAsString(*this); registerInputFormatNative(*this); registerOutputFormatNative(*this); @@ -426,6 +430,9 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorTemplate(*this); registerInputFormatProcessorMsgPack(*this); registerOutputFormatProcessorMsgPack(*this); + registerInputFormatProcessorRawBLOB(*this); + registerOutputFormatProcessorRawBLOB(*this); + #if !defined(ARCADIA_BUILD) registerInputFormatProcessorORC(*this); registerOutputFormatProcessorORC(*this); @@ -456,6 +463,7 @@ FormatFactory::FormatFactory() registerInputFormatProcessorRegexp(*this); registerInputFormatProcessorJSONAsString(*this); registerInputFormatProcessorLineAsString(*this); + #if !defined(ARCADIA_BUILD) registerInputFormatProcessorCapnProto(*this); #endif diff --git a/src/Formats/MySQLBlockInputStream.cpp b/src/Formats/MySQLBlockInputStream.cpp index be1e254b22f..73def337240 100644 --- a/src/Formats/MySQLBlockInputStream.cpp +++ b/src/Formats/MySQLBlockInputStream.cpp @@ -23,13 +23,27 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } +MySQLBlockInputStream::Connection::Connection( + const mysqlxx::PoolWithFailover::Entry & entry_, + const std::string & query_str) + : entry(entry_) + , query{entry->query(query_str)} + , result{query.use()} +{ +} MySQLBlockInputStream::MySQLBlockInputStream( - const mysqlxx::PoolWithFailover::Entry & entry_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_, const bool auto_close_) - : entry{entry_}, query{this->entry->query(query_str)}, result{query.use()}, max_block_size{max_block_size_}, auto_close{auto_close_} + const mysqlxx::PoolWithFailover::Entry & entry, + const std::string & query_str, + const Block & sample_block, + const UInt64 max_block_size_, + const bool auto_close_) + : connection{std::make_unique(entry, query_str)} + , max_block_size{max_block_size_} + , auto_close{auto_close_} { - if (sample_block.columns() != result.getNumFields()) - throw Exception{"mysqlxx::UseQueryResult contains " + toString(result.getNumFields()) + " columns while " + if (sample_block.columns() != connection->result.getNumFields()) + throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while " + toString(sample_block.columns()) + " expected", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; @@ -106,11 +120,11 @@ namespace Block MySQLBlockInputStream::readImpl() { - auto row = result.fetch(); + auto row = connection->result.fetch(); if (!row) { if (auto_close) - entry.disconnect(); + connection->entry.disconnect(); return {}; } @@ -145,11 +159,42 @@ Block MySQLBlockInputStream::readImpl() if (num_rows == max_block_size) break; - row = result.fetch(); + row = connection->result.fetch(); } return description.sample_block.cloneWithColumns(std::move(columns)); } +MySQLBlockInputStream::MySQLBlockInputStream( + const Block & sample_block_, + UInt64 max_block_size_, + bool auto_close_) + : max_block_size(max_block_size_) + , auto_close(auto_close_) +{ + description.init(sample_block_); +} + +MySQLLazyBlockInputStream::MySQLLazyBlockInputStream( + mysqlxx::Pool & pool_, + const std::string & query_str_, + const Block & sample_block_, + const UInt64 max_block_size_, + const bool auto_close_) + : MySQLBlockInputStream(sample_block_, max_block_size_, auto_close_) + , pool(pool_) + , query_str(query_str_) +{ +} + +void MySQLLazyBlockInputStream::readPrefix() +{ + connection = std::make_unique(pool.get(), query_str); + if (description.sample_block.columns() != connection->result.getNumFields()) + throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while " + + toString(description.sample_block.columns()) + " expected", + ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; +} + } #endif diff --git a/src/Formats/MySQLBlockInputStream.h b/src/Formats/MySQLBlockInputStream.h index 238994acbd8..2eaeb5b8d59 100644 --- a/src/Formats/MySQLBlockInputStream.h +++ b/src/Formats/MySQLBlockInputStream.h @@ -10,12 +10,13 @@ namespace DB { + /// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining -class MySQLBlockInputStream final : public IBlockInputStream +class MySQLBlockInputStream : public IBlockInputStream { public: MySQLBlockInputStream( - const mysqlxx::PoolWithFailover::Entry & entry_, + const mysqlxx::PoolWithFailover::Entry & entry, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_, @@ -25,15 +26,43 @@ public: Block getHeader() const override { return description.sample_block.cloneEmpty(); } -private: +protected: + MySQLBlockInputStream(const Block & sample_block_, UInt64 max_block_size_, bool auto_close_); Block readImpl() override; - mysqlxx::PoolWithFailover::Entry entry; - mysqlxx::Query query; - mysqlxx::UseQueryResult result; + struct Connection + { + Connection(const mysqlxx::PoolWithFailover::Entry & entry_, const std::string & query_str); + + mysqlxx::PoolWithFailover::Entry entry; + mysqlxx::Query query; + mysqlxx::UseQueryResult result; + }; + + std::unique_ptr connection; + const UInt64 max_block_size; const bool auto_close; ExternalResultDescription description; }; +/// Like MySQLBlockInputStream, but allocates connection only when reading is starting. +/// It allows to create a lot of stream objects without occupation of all connection pool. +class MySQLLazyBlockInputStream final : public MySQLBlockInputStream +{ +public: + MySQLLazyBlockInputStream( + mysqlxx::Pool & pool_, + const std::string & query_str_, + const Block & sample_block_, + const UInt64 max_block_size_, + const bool auto_close_ = false); + +private: + void readPrefix() override; + + mysqlxx::Pool & pool; + std::string query_str; +}; + } diff --git a/src/Functions/now.cpp b/src/Functions/now.cpp index e71c3a77f11..3ec3ee736b3 100644 --- a/src/Functions/now.cpp +++ b/src/Functions/now.cpp @@ -5,11 +5,19 @@ #include #include +#include + #include namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + namespace { @@ -35,7 +43,7 @@ private: class FunctionBaseNow : public IFunctionBaseImpl { public: - explicit FunctionBaseNow(time_t time_) : time_value(time_), return_type(std::make_shared()) {} + explicit FunctionBaseNow(time_t time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {} String getName() const override { return "now"; } @@ -72,14 +80,44 @@ public: bool isDeterministic() const override { return false; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique(); } - DataTypePtr getReturnType(const DataTypes &) const override { return std::make_shared(); } - - FunctionBaseImplPtr build(const ColumnsWithTypeAndName &, const DataTypePtr &) const override + DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override { - return std::make_unique(time(nullptr)); + if (arguments.size() > 1) + { + throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type)) + { + throw Exception( + "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + if (arguments.size() == 1) + { + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)); + } + return std::make_shared(); + } + + FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override + { + if (arguments.size() > 1) + { + throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type)) + { + throw Exception( + "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + if (arguments.size() == 1) + return std::make_unique( + time(nullptr), std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0))); + return std::make_unique(time(nullptr), std::make_shared()); } }; diff --git a/src/Functions/reinterpretStringAs.cpp b/src/Functions/reinterpretStringAs.cpp index 71528b7cb61..5a2757a4284 100644 --- a/src/Functions/reinterpretStringAs.cpp +++ b/src/Functions/reinterpretStringAs.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -68,7 +69,7 @@ public: size_t offset = 0; for (size_t i = 0; i < size; ++i) { - ToFieldType value = 0; + ToFieldType value{}; memcpy(&value, &data_from[offset], std::min(static_cast(sizeof(ToFieldType)), offsets_from[i] - offset - 1)); vec_res[i] = value; offset = offsets_from[i]; @@ -90,7 +91,7 @@ public: size_t copy_size = std::min(step, sizeof(ToFieldType)); for (size_t i = 0; i < size; ++i) { - ToFieldType value = 0; + ToFieldType value{}; memcpy(&value, &data_from[offset], copy_size); vec_res[i] = value; offset += step; @@ -120,6 +121,7 @@ struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretA struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; }; struct NameReinterpretAsDate { static constexpr auto name = "reinterpretAsDate"; }; struct NameReinterpretAsDateTime { static constexpr auto name = "reinterpretAsDateTime"; }; +struct NameReinterpretAsUUID { static constexpr auto name = "reinterpretAsUUID"; }; using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs; using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs; @@ -133,6 +135,7 @@ using FunctionReinterpretAsFloat32 = FunctionReinterpretStringAs; using FunctionReinterpretAsDate = FunctionReinterpretStringAs; using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs; +using FunctionReinterpretAsUUID = FunctionReinterpretStringAs; } @@ -150,6 +153,7 @@ void registerFunctionsReinterpretStringAs(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp index 226615c757e..d0f94441622 100644 --- a/src/IO/ReadBufferFromFile.cpp +++ b/src/IO/ReadBufferFromFile.cpp @@ -54,7 +54,7 @@ ReadBufferFromFile::ReadBufferFromFile( ReadBufferFromFile::ReadBufferFromFile( - int fd_, + int & fd_, const std::string & original_file_name, size_t buf_size, char * existing_memory, @@ -63,6 +63,7 @@ ReadBufferFromFile::ReadBufferFromFile( ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) { + fd_ = -1; } diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index a621553daae..cebda605b21 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -29,7 +29,10 @@ public: char * existing_memory = nullptr, size_t alignment = 0); /// Use pre-opened file descriptor. - ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + ReadBufferFromFile( + int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. + const std::string & original_file_name = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); ~ReadBufferFromFile() override; diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 4ade2e2c971..aeed4862fba 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -59,7 +59,7 @@ WriteBufferFromFile::WriteBufferFromFile( /// Use pre-opened file descriptor. WriteBufferFromFile::WriteBufferFromFile( - int fd_, + int & fd_, const std::string & original_file_name, size_t buf_size, char * existing_memory, @@ -68,6 +68,7 @@ WriteBufferFromFile::WriteBufferFromFile( WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) { + fd_ = -1; } diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index 12092b742e5..77530c323d2 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -39,7 +39,7 @@ public: /// Use pre-opened file descriptor. WriteBufferFromFile( - int fd, + int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index bac14acb9cd..1ca8d8988d9 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -56,7 +56,6 @@ WriteBufferFromS3::WriteBufferFromS3( initiate(); } - void WriteBufferFromS3::nextImpl() { if (!offset()) @@ -79,23 +78,31 @@ void WriteBufferFromS3::nextImpl() } } - void WriteBufferFromS3::finalize() { - next(); - - if (is_multipart) - writePart(temporary_buffer->str()); - - complete(); + finalizeImpl(); } +void WriteBufferFromS3::finalizeImpl() +{ + if (!finalized) + { + next(); + + if (is_multipart) + writePart(temporary_buffer->str()); + + complete(); + + finalized = true; + } +} WriteBufferFromS3::~WriteBufferFromS3() { try { - next(); + finalizeImpl(); } catch (...) { @@ -103,7 +110,6 @@ WriteBufferFromS3::~WriteBufferFromS3() } } - void WriteBufferFromS3::initiate() { Aws::S3::Model::CreateMultipartUploadRequest req; diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 93a6947609e..1a1e859d913 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -57,9 +57,13 @@ public: ~WriteBufferFromS3() override; private: + bool finalized = false; + void initiate(); void writePart(const String & data); void complete(); + + void finalizeImpl(); }; } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index be040ff2c34..d41f1c0499f 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -53,6 +53,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_ELEMENT_OF_SET; + extern const int BAD_ARGUMENTS; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -235,11 +236,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, co return header.cloneWithColumns(std::move(columns)); } -/** Create a block for set from literal. - * 'set_element_types' - types of what are on the left hand side of IN. - * 'right_arg' - Literal - Tuple or Array. - */ -static Block createBlockForSet( +Block createBlockForSet( const DataTypePtr & left_arg_type, const ASTPtr & right_arg, const DataTypes & set_element_types, @@ -280,14 +277,7 @@ static Block createBlockForSet( return block; } -/** Create a block for set from expression. - * 'set_element_types' - types of what are on the left hand side of IN. - * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). - * - * We need special implementation for ASTFunction, because in case, when we interpret - * large tuple or array as function, `evaluateConstantExpression` works extremely slow. - */ -static Block createBlockForSet( +Block createBlockForSet( const DataTypePtr & left_arg_type, const std::shared_ptr & right_arg, const DataTypes & set_element_types, @@ -339,7 +329,7 @@ static Block createBlockForSet( } SetPtr makeExplicitSet( - const ASTFunction * node, const Block & sample_block, bool create_ordered_set, + const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets) { const IAST & args = *node->arguments; @@ -350,7 +340,11 @@ SetPtr makeExplicitSet( const ASTPtr & left_arg = args.children.at(0); const ASTPtr & right_arg = args.children.at(1); - const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type; + const auto & index = actions.getIndex(); + auto it = index.find(left_arg->getColumnName()); + if (it == index.end()) + throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + const DataTypePtr & left_arg_type = it->second->result_type; DataTypes set_element_types = {left_arg_type}; const auto * left_tuple_type = typeid_cast(left_arg_type.get()); @@ -381,95 +375,145 @@ SetPtr makeExplicitSet( return set; } -ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_) +ActionsMatcher::Data::Data( + const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, + const NamesAndTypesList & source_columns_, ActionsDAGPtr actions, + PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, + bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_) + : context(context_) + , set_size_limit(set_size_limit_) + , subquery_depth(subquery_depth_) + , source_columns(source_columns_) + , prepared_sets(prepared_sets_) + , subqueries_for_sets(subqueries_for_sets_) + , no_subqueries(no_subqueries_) + , no_makeset(no_makeset_) + , only_consts(only_consts_) + , no_storage_or_local(no_storage_or_local_) + , visit_depth(0) + , actions_stack(std::move(actions), context) + , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) +{ +} + +bool ActionsMatcher::Data::hasColumn(const String & column_name) const +{ + return actions_stack.getLastActions().getIndex().count(column_name) != 0; +} + +ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_) : context(context_) { - stack.emplace_back(); - stack.back().actions = actions; + auto & level = stack.emplace_back(); + level.actions = std::move(actions); - const Block & sample_block = actions->getSampleBlock(); - for (size_t i = 0, size = sample_block.columns(); i < size; ++i) - stack.back().new_columns.insert(sample_block.getByPosition(i).name); + for (const auto & [name, node] : level.actions->getIndex()) + if (node->type == ActionsDAG::Type::INPUT) + level.inputs.emplace(name); } void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) { - stack.emplace_back(); - Level & prev = stack[stack.size() - 2]; - - ColumnsWithTypeAndName all_columns; - NameSet new_names; + auto & level = stack.emplace_back(); + level.actions = std::make_shared(); + const auto & prev = stack[stack.size() - 2]; for (const auto & input_column : input_columns) { - all_columns.emplace_back(nullptr, input_column.type, input_column.name); - new_names.insert(input_column.name); - stack.back().new_columns.insert(input_column.name); + level.actions->addInput(input_column.name, input_column.type); + level.inputs.emplace(input_column.name); } - const Block & prev_sample_block = prev.actions->getSampleBlock(); - for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i) + const auto & index = level.actions->getIndex(); + + for (const auto & [name, node] : prev.actions->getIndex()) { - const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i); - if (!new_names.count(col.name)) - all_columns.push_back(col); + if (index.count(name) == 0) + level.actions->addInput({node->column, node->result_type, node->result_name}); } - - stack.back().actions = std::make_shared(all_columns, context); } size_t ScopeStack::getColumnLevel(const std::string & name) { - for (int i = static_cast(stack.size()) - 1; i >= 0; --i) - if (stack[i].new_columns.count(name)) + for (size_t i = stack.size(); i > 0;) + { + --i; + + if (stack[i].inputs.count(name)) return i; + const auto & index = stack[i].actions->getIndex(); + auto it = index.find(name); + + if (it != index.end() && it->second->type != ActionsDAG::Type::INPUT) + return i; + } + throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); } -void ScopeStack::addAction(const ExpressionAction & action) +void ScopeStack::addColumn(ColumnWithTypeAndName column) { - size_t level = 0; - Names required = action.getNeededColumns(); - for (const auto & elem : required) - level = std::max(level, getColumnLevel(elem)); + const auto & node = stack[0].actions->addColumn(std::move(column)); - Names added; - stack[level].actions->add(action, added); - - stack[level].new_columns.insert(added.begin(), added.end()); - - for (const auto & elem : added) - { - const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(elem); - for (size_t j = level + 1; j < stack.size(); ++j) - stack[j].actions->addInput(col); - } + for (size_t j = 1; j < stack.size(); ++j) + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } -void ScopeStack::addActionNoInput(const ExpressionAction & action) +void ScopeStack::addAlias(const std::string & name, std::string alias) { - size_t level = 0; - Names required = action.getNeededColumns(); - for (const auto & elem : required) - level = std::max(level, getColumnLevel(elem)); + auto level = getColumnLevel(name); + const auto & node = stack[level].actions->addAlias(name, std::move(alias)); - Names added; - stack[level].actions->add(action, added); - - stack[level].new_columns.insert(added.begin(), added.end()); + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } -ExpressionActionsPtr ScopeStack::popLevel() +void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name) { - ExpressionActionsPtr res = stack.back().actions; + getColumnLevel(source_name); + + if (stack.front().actions->getIndex().count(source_name) == 0) + throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name, + ErrorCodes::BAD_ARGUMENTS); + + const auto & node = stack.front().actions->addArrayJoin(source_name, std::move(result_name), std::move(unique_column_name)); + + for (size_t j = 1; j < stack.size(); ++j) + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); +} + +void ScopeStack::addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions) +{ + size_t level = 0; + for (const auto & argument : argument_names) + level = std::max(level, getColumnLevel(argument)); + + const auto & node = stack[level].actions->addFunction(function, argument_names, std::move(result_name), compile_expressions); + + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); +} + +ActionsDAGPtr ScopeStack::popLevel() +{ + auto res = std::move(stack.back()); stack.pop_back(); - return res; + return res.actions; } -const Block & ScopeStack::getSampleBlock() const +std::string ScopeStack::dumpNames() const { - return stack.back().actions->getSampleBlock(); + return stack.back().actions->dumpNames(); +} + +const ActionsDAG & ScopeStack::getLastActions() const +{ + return *stack.back().actions; } struct CachedColumnName @@ -532,7 +576,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, /// Special check for WITH statement alias. Add alias action to be able to use this alias. if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) - data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}})); + data.addAlias(identifier.name, identifier.alias); } } @@ -556,14 +600,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!data.only_consts) { String result_name = column_name.get(ast); - /// Here we copy argument because arrayJoin removes source column. - /// It makes possible to remove source column before arrayJoin if it won't be needed anymore. - - /// It could have been possible to implement arrayJoin which keeps source column, - /// but in this case it will always be replicated (as many arrays), which is expensive. - String tmp_name = data.getUniqueName("_array_join_" + arg->getColumnName()); - data.addActionNoInput(ExpressionAction::copyColumn(arg->getColumnName(), tmp_name)); - data.addAction(ExpressionAction::arrayJoin(tmp_name, result_name)); + data.addArrayJoin(arg->getColumnName(), result_name); } return; @@ -588,10 +625,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto argument_name = node.arguments->children.at(0)->getColumnName(); - data.addAction(ExpressionAction::applyFunction( + data.addFunction( FunctionFactory::instance().get(node.name + "IgnoreSet", data.context), { argument_name, argument_name }, - column_name.get(ast))); + column_name.get(ast)); } return; } @@ -663,7 +700,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & column.column = ColumnConst::create(std::move(column_set), 1); else column.column = std::move(column_set); - data.addAction(ExpressionAction::addColumn(column)); + data.addColumn(column); } argument_types.push_back(column.type); @@ -679,7 +716,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & ColumnConst::create(std::move(column_string), 1), std::make_shared(), data.getUniqueName("__" + node.name)); - data.addAction(ExpressionAction::addColumn(column)); + data.addColumn(column); argument_types.push_back(column.type); argument_names.push_back(column.name); } @@ -699,9 +736,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & child_column_name = as_literal->unique_column_name; } - if (data.hasColumn(child_column_name)) + const auto & index = data.actions_stack.getLastActions().getIndex(); + auto it = index.find(child_column_name); + if (it != index.end()) { - argument_types.push_back(data.getSampleBlock().getByName(child_column_name).type); + argument_types.push_back(it->second->result_type); argument_names.push_back(child_column_name); } else @@ -709,7 +748,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (data.only_consts) arguments_present = false; else - throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.getSampleBlock().dumpNames(), + throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER); } } @@ -746,7 +785,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.actions_stack.pushLevel(lambda_arguments); visit(lambda->arguments->children.at(1), data); - ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel(); + auto lambda_dag = data.actions_stack.popLevel(); + auto lambda_actions = lambda_dag->buildExpressions(data.context); String result_name = lambda->arguments->children.at(1)->getColumnName(); lambda_actions->finalize(Names(1, result_name)); @@ -765,7 +805,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto function_capture = std::make_unique( lambda_actions, captured, lambda_arguments, result_type, result_name); auto function_capture_adapter = std::make_shared(std::move(function_capture)); - data.addAction(ExpressionAction::applyFunction(function_capture_adapter, captured, lambda_name)); + data.addFunction(function_capture_adapter, captured, lambda_name); argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); argument_names[i] = lambda_name; @@ -787,7 +827,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (arguments_present) { - data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast))); + data.addFunction(function_builder, argument_names, column_name.get(ast)); } } @@ -802,8 +842,12 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, if (literal.unique_column_name.empty()) { const auto default_name = literal.getColumnName(); - const auto & block = data.getSampleBlock(); - const auto * existing_column = block.findByName(default_name); + const auto & index = data.actions_stack.getLastActions().getIndex(); + const ActionsDAG::Node * existing_column = nullptr; + + auto it = index.find(default_name); + if (it != index.end()) + existing_column = it->second; /* * To approximate CSE, bind all identical literals to a single temporary @@ -839,7 +883,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, column.column = type->createColumnConst(1, value); column.type = type; - data.addAction(ExpressionAction::addColumn(column)); + data.addColumn(std::move(column)); } SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries) @@ -851,7 +895,6 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su const IAST & args = *node.arguments; const ASTPtr & left_in_operand = args.children.at(0); const ASTPtr & right_in_operand = args.children.at(1); - const Block & sample_block = data.getSampleBlock(); /// If the subquery or table name for SELECT. const auto * identifier = right_in_operand->as(); @@ -913,9 +956,11 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su } else { - if (sample_block.has(left_in_operand->getColumnName())) + const auto & last_actions = data.actions_stack.getLastActions(); + const auto & index = last_actions.getIndex(); + if (index.count(left_in_operand->getColumnName()) != 0) /// An explicit enumeration of values in parentheses. - return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets); + return makeExplicitSet(&node, last_actions, false, data.context, data.set_size_limit, data.prepared_sets); else return {}; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index d8d85f1c0bf..f4da9932163 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -16,11 +16,43 @@ struct ExpressionAction; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; - /// The case of an explicit enumeration of values. +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +/// The case of an explicit enumeration of values. SetPtr makeExplicitSet( - const ASTFunction * node, const Block & sample_block, bool create_ordered_set, + const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets); +/** Create a block for set from expression. + * 'set_element_types' - types of what are on the left hand side of IN. + * 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). + * + * We need special implementation for ASTFunction, because in case, when we interpret + * large tuple or array as function, `evaluateConstantExpression` works extremely slow. + * + * Note: this and following functions are used in third-party applications in Arcadia, so + * they should be declared in header file. + * + */ +Block createBlockForSet( + const DataTypePtr & left_arg_type, + const std::shared_ptr & right_arg, + const DataTypes & set_element_types, + const Context & context); + +/** Create a block for set from literal. + * 'set_element_types' - types of what are on the left hand side of IN. + * 'right_arg' - Literal - Tuple or Array. + */ +Block createBlockForSet( + const DataTypePtr & left_arg_type, + const ASTPtr & right_arg, + const DataTypes & set_element_types, + const Context & context); /** For ActionsVisitor * A stack of ExpressionActions corresponding to nested lambda expressions. @@ -33,8 +65,8 @@ struct ScopeStack { struct Level { - ExpressionActionsPtr actions; - NameSet new_columns; + ActionsDAGPtr actions; + NameSet inputs; }; using Levels = std::vector; @@ -43,19 +75,25 @@ struct ScopeStack const Context & context; - ScopeStack(const ExpressionActionsPtr & actions, const Context & context_); + ScopeStack(ActionsDAGPtr actions, const Context & context_); void pushLevel(const NamesAndTypesList & input_columns); size_t getColumnLevel(const std::string & name); - void addAction(const ExpressionAction & action); - /// For arrayJoin() to avoid double columns in the input. - void addActionNoInput(const ExpressionAction & action); + void addColumn(ColumnWithTypeAndName column); + void addAlias(const std::string & name, std::string alias); + void addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name); + void addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions); - ExpressionActionsPtr popLevel(); + ActionsDAGPtr popLevel(); - const Block & getSampleBlock() const; + const ActionsDAG & getLastActions() const; + std::string dumpNames() const; }; class ASTIdentifier; @@ -91,47 +129,38 @@ public: int next_unique_suffix; Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions, + const NamesAndTypesList & source_columns_, ActionsDAGPtr actions, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, - bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_) - : context(context_), - set_size_limit(set_size_limit_), - subquery_depth(subquery_depth_), - source_columns(source_columns_), - prepared_sets(prepared_sets_), - subqueries_for_sets(subqueries_for_sets_), - no_subqueries(no_subqueries_), - no_makeset(no_makeset_), - only_consts(only_consts_), - no_storage_or_local(no_storage_or_local_), - visit_depth(0), - actions_stack(actions, context), - next_unique_suffix(actions_stack.getSampleBlock().columns() + 1) - {} - - void updateActions(ExpressionActionsPtr & actions) - { - actions = actions_stack.popLevel(); - } - - void addAction(const ExpressionAction & action) - { - actions_stack.addAction(action); - } - void addActionNoInput(const ExpressionAction & action) - { - actions_stack.addActionNoInput(action); - } - - const Block & getSampleBlock() const - { - return actions_stack.getSampleBlock(); - } + bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_); /// Does result of the calculation already exists in the block. - bool hasColumn(const String & columnName) const + bool hasColumn(const String & column_name) const; + void addColumn(ColumnWithTypeAndName column) { - return actions_stack.getSampleBlock().has(columnName); + actions_stack.addColumn(std::move(column)); + } + + void addAlias(const std::string & name, std::string alias) + { + actions_stack.addAlias(name, std::move(alias)); + } + + void addArrayJoin(const std::string & source_name, std::string result_name) + { + actions_stack.addArrayJoin(source_name, std::move(result_name), getUniqueName("_array_join_" + source_name)); + } + + void addFunction(const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name) + { + actions_stack.addFunction(function, argument_names, std::move(result_name), + context.getSettingsRef().compile_expressions); + } + + ActionsDAGPtr getActions() + { + return actions_stack.popLevel(); } /* @@ -140,12 +169,11 @@ public: */ String getUniqueName(const String & prefix) { - const auto & block = getSampleBlock(); auto result = prefix; // First, try the name without any suffix, because it is currently // used both as a display name and a column id. - while (block.has(result)) + while (hasColumn(result)) { result = prefix + "_" + toString(next_unique_suffix); ++next_unique_suffix; diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 176dc8258ce..62064bf3292 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -35,11 +35,13 @@ ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool arr } -void ArrayJoinAction::prepare(Block & sample_block) +void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const { - for (const auto & name : columns) + for (auto & current : sample) { - ColumnWithTypeAndName & current = sample_block.getByName(name); + if (columns.count(current.name) == 0) + continue; + const DataTypeArray * array_type = typeid_cast(&*current.type); if (!array_type) throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); diff --git a/src/Interpreters/ArrayJoinAction.h b/src/Interpreters/ArrayJoinAction.h index 9467e579e62..81f720f00ef 100644 --- a/src/Interpreters/ArrayJoinAction.h +++ b/src/Interpreters/ArrayJoinAction.h @@ -28,7 +28,7 @@ public: FunctionOverloadResolverPtr function_builder; ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, const Context & context); - void prepare(Block & sample_block); + void prepare(ColumnsWithTypeAndName & sample) const; void execute(Block & block); }; diff --git a/src/Interpreters/AsteriskSemantic.h b/src/Interpreters/AsteriskSemantic.h deleted file mode 100644 index 1bd9ecedddd..00000000000 --- a/src/Interpreters/AsteriskSemantic.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include - -#include -#include -#include - -namespace DB -{ - -struct AsteriskSemanticImpl -{ - using RevertedAliases = std::unordered_map>; - using RevertedAliasesPtr = std::shared_ptr; - - RevertedAliasesPtr aliases; /// map of aliases that should be set in phase of * expanding. -}; - - -struct AsteriskSemantic -{ - using RevertedAliases = AsteriskSemanticImpl::RevertedAliases; - using RevertedAliasesPtr = AsteriskSemanticImpl::RevertedAliasesPtr; - - static void setAliases(ASTAsterisk & node, const RevertedAliasesPtr & aliases) { node.semantic = makeSemantic(aliases); } - static void setAliases(ASTQualifiedAsterisk & node, const RevertedAliasesPtr & aliases) { node.semantic = makeSemantic(aliases); } - static void setAliases(ASTColumnsMatcher & node, const RevertedAliasesPtr & aliases) { node.semantic = makeSemantic(aliases); } - - static RevertedAliasesPtr getAliases(const ASTAsterisk & node) { return node.semantic ? node.semantic->aliases : nullptr; } - static RevertedAliasesPtr getAliases(const ASTQualifiedAsterisk & node) { return node.semantic ? node.semantic->aliases : nullptr; } - static RevertedAliasesPtr getAliases(const ASTColumnsMatcher & node) { return node.semantic ? node.semantic->aliases : nullptr; } - -private: - static std::shared_ptr makeSemantic(const RevertedAliasesPtr & aliases) - { - return std::make_shared(AsteriskSemanticImpl{aliases}); - } -}; - -} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 704b21f3a4a..68f18ffffe8 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -968,6 +968,7 @@ StoragePtr Context::getViewSource() Settings Context::getSettings() const { + auto lock = getLock(); return settings; } @@ -1088,6 +1089,18 @@ String Context::getInitialQueryId() const } +void Context::setCurrentDatabaseNameInGlobalContext(const String & name) +{ + if (global_context != this) + throw Exception("Cannot set current database for non global context, this method should be used during server initialization", ErrorCodes::LOGICAL_ERROR); + auto lock = getLock(); + + if (!current_database.empty()) + throw Exception("Default database name cannot be changed in global context without server restart", ErrorCodes::LOGICAL_ERROR); + + current_database = name; +} + void Context::setCurrentDatabase(const String & name) { DatabaseCatalog::instance().assertDatabaseExists(name); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3d66ef239e7..bd5e17fe2e4 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -359,6 +359,9 @@ public: String getInitialQueryId() const; void setCurrentDatabase(const String & name); + /// Set current_database for global context. We don't validate that database + /// exists because it should be set before databases loading. + void setCurrentDatabaseNameInGlobalContext(const String & name); void setCurrentQueryId(const String & query_id); void killCurrentQuery(); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 5b346eec54a..3591c7768ee 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -910,7 +910,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( String executed_by; zkutil::EventPtr event = std::make_shared(); - if (zookeeper->tryGet(is_executed_path, executed_by)) + if (zookeeper->tryGet(is_executed_path, executed_by, nullptr, event)) { LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by); return true; @@ -961,6 +961,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( if (event->tryWait(std::uniform_int_distribution(0, 1000)(rng))) { + LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path)); executed_by_leader = true; break; } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 049341918b9..203e2292c08 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -701,6 +701,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, 0}); else tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time}); + tables_marked_dropped_ids.insert(table_id.uuid); /// If list of dropped tables was empty, start a drop task if (drop_task && tables_marked_dropped.size() == 1) (*drop_task)->schedule(); @@ -742,6 +743,9 @@ void DatabaseCatalog::dropTableDataTask() try { dropTableFinally(table); + std::lock_guard lock(tables_marked_dropped_mutex); + [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid); + assert(removed); } catch (...) { @@ -755,6 +759,8 @@ void DatabaseCatalog::dropTableDataTask() need_reschedule = true; } } + + wait_table_finally_dropped.notify_all(); } /// Do not schedule a task if there is no tables to drop @@ -814,6 +820,17 @@ String DatabaseCatalog::resolveDictionaryName(const String & name) const return toString(db_and_table.second->getStorageID().uuid); } +void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) +{ + if (uuid == UUIDHelpers::Nil) + return; + std::unique_lock lock{tables_marked_dropped_mutex}; + wait_table_finally_dropped.wait(lock, [&]() + { + return tables_marked_dropped_ids.count(uuid) == 0; + }); +} + DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem) : map(map_), db_mutex(db_mutex_), guards_lock(std::move(guards_lock_)) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 8ef3ecfe656..7bc6923bde4 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -179,6 +180,8 @@ public: /// Try convert qualified dictionary name to persistent UUID String resolveDictionaryName(const String & name) const; + void waitTableFinallyDropped(const UUID & uuid); + private: // The global instance of database catalog. unique_ptr is to allow // deferred initialization. Thought I'd use std::optional, but I can't @@ -249,11 +252,13 @@ private: mutable std::mutex ddl_guards_mutex; TablesMarkedAsDropped tables_marked_dropped; + std::unordered_set tables_marked_dropped_ids; mutable std::mutex tables_marked_dropped_mutex; std::unique_ptr drop_task; static constexpr time_t default_drop_delay_sec = 8 * 60; time_t drop_delay_sec = default_drop_delay_sec; + std::condition_variable wait_table_finally_dropped; }; } diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 0c287e4026d..36c08d945eb 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -13,8 +13,10 @@ #include #include #include +#include #include #include +#include #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -186,7 +188,8 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, size_t result_position = sample_block.columns(); sample_block.insert({nullptr, result_type, result_name}); - function = function_base->prepare(sample_block, arguments, result_position); + if (!function) + function = function_base->prepare(sample_block, arguments, result_position); function->createLowCardinalityResultCache(settings.max_threads); bool compile_expressions = false; @@ -198,7 +201,10 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, /// so we don't want to unfold non deterministic functions if (all_const && function_base->isSuitableForConstantFolding() && (!compile_expressions || function_base->isDeterministic())) { - function->execute(sample_block, arguments, result_position, sample_block.rows(), true); + if (added_column) + sample_block.getByPosition(result_position).column = added_column; + else + function->execute(sample_block, arguments, result_position, sample_block.rows(), true); /// If the result is not a constant, just in case, we will consider the result as unknown. ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position); @@ -586,8 +592,11 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names) arguments[i] = sample_block.getByName(action.argument_names[i]); } - action.function_base = action.function_builder->build(arguments); - action.result_type = action.function_base->getReturnType(); + if (!action.function_base) + { + action.function_base = action.function_builder->build(arguments); + action.result_type = action.function_base->getReturnType(); + } } if (action.type == ExpressionAction::ADD_ALIASES) @@ -1256,8 +1265,14 @@ void ExpressionActionsChain::addStep() if (steps.empty()) throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + if (auto * step = typeid_cast(steps.back().get())) + { + if (!step->actions) + step->actions = step->actions_dag->buildExpressions(context); + } + ColumnsWithTypeAndName columns = steps.back()->getResultColumns(); - steps.push_back(std::make_unique(std::make_shared(columns, context))); + steps.push_back(std::make_unique(std::make_shared(columns))); } void ExpressionActionsChain::finalize() @@ -1404,14 +1419,383 @@ void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_) std::swap(result_columns, new_result_columns); } -ExpressionActionsPtr & ExpressionActionsChain::Step::actions() +ActionsDAGPtr & ExpressionActionsChain::Step::actions() { - return typeid_cast(this)->actions; + return typeid_cast(this)->actions_dag; } -const ExpressionActionsPtr & ExpressionActionsChain::Step::actions() const +const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const +{ + return typeid_cast(this)->actions_dag; +} + +ExpressionActionsPtr ExpressionActionsChain::Step::getExpression() const { return typeid_cast(this)->actions; } +ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) +{ + for (const auto & input : inputs) + addInput(input.name, input.type); +} + +ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) +{ + for (const auto & input : inputs) + addInput(input); +} + +ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) +{ + auto it = index.find(node.result_name); + if (it != index.end() && !can_replace) + throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); + + auto & res = nodes.emplace_back(std::move(node)); + + if (it != index.end()) + it->second->renaming_parent = &res; + + index[res.result_name] = &res; + return res; +} + +ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) +{ + auto it = index.find(name); + if (it == index.end()) + throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + + return *it->second; +} + +const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) +{ + Node node; + node.type = Type::INPUT; + node.result_type = std::move(type); + node.result_name = std::move(name); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) +{ + Node node; + node.type = Type::INPUT; + node.result_type = std::move(column.type); + node.result_name = std::move(column.name); + node.column = std::move(column.column); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) +{ + if (!column.column) + throw Exception("Cannot add column " + column.name + " because it is nullptr", ErrorCodes::LOGICAL_ERROR); + + Node node; + node.type = Type::COLUMN; + node.result_type = std::move(column.type); + node.result_name = std::move(column.name); + node.column = std::move(column.column); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace) +{ + auto & child = getNode(name); + + Node node; + node.type = Type::ALIAS; + node.result_type = child.result_type; + node.result_name = std::move(alias); + node.column = child.column; + node.allow_constant_folding = child.allow_constant_folding; + node.children.emplace_back(&child); + + return addNode(std::move(node), can_replace); +} + +const ActionsDAG::Node & ActionsDAG::addArrayJoin( + const std::string & source_name, std::string result_name, std::string unique_column_name) +{ + auto & child = getNode(source_name); + + const DataTypeArray * array_type = typeid_cast(child.result_type.get()); + if (!array_type) + throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); + + Node node; + node.type = Type::ARRAY_JOIN; + node.result_type = array_type->getNestedType(); + node.result_name = std::move(result_name); + node.unique_column_name_for_array_join = std::move(unique_column_name); + node.children.emplace_back(&child); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions [[maybe_unused]]) +{ + size_t num_arguments = argument_names.size(); + + Node node; + node.type = Type::FUNCTION; + node.function_builder = function; + node.children.reserve(num_arguments); + + bool all_const = true; + ColumnsWithTypeAndName arguments(num_arguments); + ColumnNumbers argument_numbers(num_arguments); + + for (size_t i = 0; i < num_arguments; ++i) + { + auto & child = getNode(argument_names[i]); + node.children.emplace_back(&child); + node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding; + + ColumnWithTypeAndName argument; + argument.column = child.column; + argument.type = child.result_type; + + if (!argument.column || !isColumnConst(*argument.column)) + all_const = false; + + arguments[i] = std::move(argument); + argument_numbers[i] = i; + } + + node.function_base = function->build(arguments); + node.result_type = node.function_base->getReturnType(); + + Block sample_block(std::move(arguments)); + sample_block.insert({nullptr, node.result_type, node.result_name}); + node.function = node.function_base->prepare(sample_block, argument_numbers, num_arguments); + + bool do_compile_expressions = false; +#if USE_EMBEDDED_COMPILER + do_compile_expressions = compile_expressions; +#endif + /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. + /// But if we compile expressions compiled version of this function maybe placed in cache, + /// so we don't want to unfold non deterministic functions + if (all_const && node.function_base->isSuitableForConstantFolding() && (!do_compile_expressions || node.function_base->isDeterministic())) + { + node.function->execute(sample_block, argument_numbers, num_arguments, sample_block.rows(), true); + + /// If the result is not a constant, just in case, we will consider the result as unknown. + ColumnWithTypeAndName & col = sample_block.safeGetByPosition(num_arguments); + if (isColumnConst(*col.column)) + { + /// All constant (literal) columns in block are added with size 1. + /// But if there was no columns in block before executing a function, the result has size 0. + /// Change the size to 1. + + if (col.column->empty()) + col.column = col.column->cloneResized(1); + + node.column = std::move(col.column); + } + } + + /// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant. + /// We can't do constant folding, but can specify in sample block that function result is constant to avoid + /// unnecessary materialization. + if (!node.column && node.function_base->isSuitableForConstantFolding()) + { + if (auto col = node.function_base->getResultIfAlwaysReturnsConstantAndHasArguments(sample_block, argument_numbers)) + { + node.column = std::move(col); + node.allow_constant_folding = false; + } + } + + if (result_name.empty()) + { + result_name = function->getName() + "("; + for (size_t i = 0; i < argument_names.size(); ++i) + { + if (i) + result_name += ", "; + result_name += argument_names[i]; + } + result_name += ")"; + } + + node.result_name = std::move(result_name); + + return addNode(std::move(node)); +} + +ColumnsWithTypeAndName ActionsDAG::getResultColumns() const +{ + ColumnsWithTypeAndName result; + result.reserve(index.size()); + for (const auto & node : nodes) + if (!node.renaming_parent) + result.emplace_back(node.column, node.result_type, node.result_name); + + return result; +} + +NamesAndTypesList ActionsDAG::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & node : nodes) + if (!node.renaming_parent) + result.emplace_back(node.result_name, node.result_type); + + return result; +} + +Names ActionsDAG::getNames() const +{ + Names names; + names.reserve(index.size()); + for (const auto & node : nodes) + if (!node.renaming_parent) + names.emplace_back(node.result_name); + + return names; +} + +std::string ActionsDAG::dumpNames() const +{ + WriteBufferFromOwnString out; + for (auto it = nodes.begin(); it != nodes.end(); ++it) + { + if (it != nodes.begin()) + out << ", "; + out << it->result_name; + } + return out.str(); +} + +ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) +{ + struct Data + { + Node * node = nullptr; + size_t num_created_children = 0; + size_t num_expected_children = 0; + std::vector parents; + Node * renamed_child = nullptr; + }; + + std::vector data(nodes.size()); + std::unordered_map reverse_index; + + for (auto & node : nodes) + { + size_t id = reverse_index.size(); + data[id].node = &node; + reverse_index[&node] = id; + } + + std::queue ready_nodes; + std::queue ready_array_joins; + + for (auto & node : nodes) + { + data[reverse_index[&node]].num_expected_children += node.children.size(); + + for (const auto & child : node.children) + data[reverse_index[child]].parents.emplace_back(&node); + + if (node.renaming_parent) + { + + auto & cur = data[reverse_index[node.renaming_parent]]; + cur.renamed_child = &node; + cur.num_expected_children += 1; + } + } + + for (auto & node : nodes) + { + if (node.children.empty() && data[reverse_index[&node]].renamed_child == nullptr) + ready_nodes.emplace(&node); + } + + auto update_parent = [&](Node * parent) + { + auto & cur = data[reverse_index[parent]]; + ++cur.num_created_children; + + if (cur.num_created_children == cur.num_expected_children) + { + auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes; + push_stack.push(parent); + } + }; + + auto expressions = std::make_shared(NamesAndTypesList(), context); + + while (!ready_nodes.empty() || !ready_array_joins.empty()) + { + auto & stack = ready_nodes.empty() ? ready_array_joins : ready_nodes; + Node * node = stack.front(); + stack.pop(); + + Names argument_names; + for (const auto & child : node->children) + argument_names.emplace_back(child->result_name); + + auto & cur = data[reverse_index[node]]; + + switch (node->type) + { + case Type::INPUT: + expressions->addInput({node->column, node->result_type, node->result_name}); + break; + case Type::COLUMN: + expressions->add(ExpressionAction::addColumn({node->column, node->result_type, node->result_name})); + break; + case Type::ALIAS: + expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name, cur.renamed_child != nullptr)); + break; + case Type::ARRAY_JOIN: + /// Here we copy argument because arrayJoin removes source column. + /// It makes possible to remove source column before arrayJoin if it won't be needed anymore. + + /// It could have been possible to implement arrayJoin which keeps source column, + /// but in this case it will always be replicated (as many arrays), which is expensive. + expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->unique_column_name_for_array_join)); + expressions->add(ExpressionAction::arrayJoin(node->unique_column_name_for_array_join, node->result_name)); + break; + case Type::FUNCTION: + { + ExpressionAction action; + action.type = ExpressionAction::APPLY_FUNCTION; + action.result_name = node->result_name; + action.result_type = node->result_type; + action.function_builder = node->function_builder; + action.function_base = node->function_base; + action.function = node->function; + action.argument_names = std::move(argument_names); + action.added_column = node->column; + + expressions->add(action); + break; + } + } + + for (const auto & parent : cur.parents) + update_parent(parent); + + if (node->renaming_parent) + update_parent(node->renaming_parent); + } + + return expressions; +} + } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 0607bc1e055..b35f8972c97 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -140,6 +140,89 @@ private: class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG +{ +public: + + enum class Type + { + /// Column which must be in input. + INPUT, + /// Constant column with known value. + COLUMN, + /// Another one name for column. + ALIAS, + /// Function arrayJoin. Specially separated because it changes the number of rows. + ARRAY_JOIN, + FUNCTION, + }; + + struct Node + { + std::vector children; + /// This field is filled if current node is replaced by existing node with the same name. + Node * renaming_parent = nullptr; + + Type type; + + std::string result_name; + DataTypePtr result_type; + + std::string unique_column_name_for_array_join; + + FunctionOverloadResolverPtr function_builder; + /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. + FunctionBasePtr function_base; + /// Prepared function which is used in function execution. + ExecutableFunctionPtr function; + + /// For COLUMN node and propagated constants. + ColumnPtr column; + /// Some functions like `ignore()` always return constant but can't be replaced by constant it. + /// We calculate such constants in order to avoid unnecessary materialization, but prohibit it's folding. + bool allow_constant_folding = true; + }; + + using Index = std::unordered_map; + +private: + std::list nodes; + Index index; + +public: + ActionsDAG() = default; + ActionsDAG(const ActionsDAG &) = delete; + ActionsDAG & operator=(const ActionsDAG &) = delete; + ActionsDAG(const NamesAndTypesList & inputs); + ActionsDAG(const ColumnsWithTypeAndName & inputs); + + const Index & getIndex() const { return index; } + + ColumnsWithTypeAndName getResultColumns() const; + NamesAndTypesList getNamesAndTypesList() const; + Names getNames() const; + std::string dumpNames() const; + + const Node & addInput(std::string name, DataTypePtr type); + const Node & addInput(ColumnWithTypeAndName column); + const Node & addColumn(ColumnWithTypeAndName column); + const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); + const Node & addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name); + const Node & addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions); + + ExpressionActionsPtr buildExpressions(const Context & context); + +private: + Node & addNode(Node node, bool can_replace = false); + Node & getNode(const std::string & name); +}; + +using ActionsDAGPtr = std::shared_ptr; + /** Contains a sequence of actions on the block. */ class ExpressionActions @@ -287,17 +370,19 @@ struct ExpressionActionsChain virtual std::string dump() const = 0; /// Only for ExpressionActionsStep - ExpressionActionsPtr & actions(); - const ExpressionActionsPtr & actions() const; + ActionsDAGPtr & actions(); + const ActionsDAGPtr & actions() const; + ExpressionActionsPtr getExpression() const; }; struct ExpressionActionsStep : public Step { + ActionsDAGPtr actions_dag; ExpressionActionsPtr actions; - explicit ExpressionActionsStep(ExpressionActionsPtr actions_, Names required_output_ = Names()) + explicit ExpressionActionsStep(ActionsDAGPtr actions_, Names required_output_ = Names()) : Step(std::move(required_output_)) - , actions(std::move(actions_)) + , actions_dag(std::move(actions_)) { } @@ -382,7 +467,9 @@ struct ExpressionActionsChain throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); } - return steps.back()->actions(); + auto * step = typeid_cast(steps.back().get()); + step->actions = step->actions_dag->buildExpressions(context); + return step->actions; } Step & getLastStep() @@ -396,7 +483,7 @@ struct ExpressionActionsChain Step & lastStep(const NamesAndTypesList & columns) { if (steps.empty()) - steps.emplace_back(std::make_unique(std::make_shared(columns, context))); + steps.emplace_back(std::make_unique(std::make_shared(columns))); return *steps.back(); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2ca183ff9af..2f0dee58141 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -153,38 +153,51 @@ void ExpressionAnalyzer::analyzeAggregation() auto * select_query = query->as(); - ExpressionActionsPtr temp_actions = std::make_shared(sourceColumns(), context); + auto temp_actions = std::make_shared(sourceColumns()); if (select_query) { NamesAndTypesList array_join_columns; + columns_after_array_join = sourceColumns(); bool is_array_join_left; if (ASTPtr array_join_expression_list = select_query->arrayJoinExpressionList(is_array_join_left)) { getRootActionsNoMakeSet(array_join_expression_list, true, temp_actions, false); - if (auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left)) + + auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left); + auto sample_columns = temp_actions->getResultColumns(); + array_join->prepare(sample_columns); + temp_actions = std::make_shared(sample_columns); + + NamesAndTypesList new_columns_after_array_join; + NameSet added_columns; + + for (auto & column : temp_actions->getResultColumns()) { - auto sample_block = temp_actions->getSampleBlock(); - array_join->prepare(sample_block); - temp_actions = std::make_shared(sample_block.getColumnsWithTypeAndName(), context); + if (syntax->array_join_result_to_source.count(column.name)) + { + new_columns_after_array_join.emplace_back(column.name, column.type); + added_columns.emplace(column.name); + } } - for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList()) - if (syntax->array_join_result_to_source.count(column.name)) - array_join_columns.emplace_back(column); + for (auto & column : columns_after_array_join) + if (added_columns.count(column.name) == 0) + new_columns_after_array_join.emplace_back(column.name, column.type); + + columns_after_array_join.swap(new_columns_after_array_join); } - columns_after_array_join = sourceColumns(); columns_after_array_join.insert(columns_after_array_join.end(), array_join_columns.begin(), array_join_columns.end()); const ASTTablesInSelectQueryElement * join = select_query->join(); if (join) { getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, temp_actions, false); - auto sample_columns = temp_actions->getSampleBlock().getColumnsWithTypeAndName(); + auto sample_columns = temp_actions->getResultColumns(); analyzedJoin().addJoinedColumnsAndCorrectNullability(sample_columns); - temp_actions = std::make_shared(sample_columns, context); + temp_actions = std::make_shared(sample_columns); } columns_after_join = columns_after_array_join; @@ -212,15 +225,16 @@ void ExpressionAnalyzer::analyzeAggregation() getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); - const auto & block = temp_actions->getSampleBlock(); + const auto & index = temp_actions->getIndex(); - if (!block.has(column_name)) + auto it = index.find(column_name); + if (it == index.end()) throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); - const auto & col = block.getByName(column_name); + const auto & node = it->second; /// Constant expressions have non-null column pointer at this stage. - if (col.column && isColumnConst(*col.column)) + if (node->column && isColumnConst(*node->column)) { /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. if (!aggregate_descriptions.empty() || size > 1) @@ -235,7 +249,7 @@ void ExpressionAnalyzer::analyzeAggregation() } } - NameAndTypePair key{column_name, col.type}; + NameAndTypePair key{column_name, node->result_type}; /// Aggregation keys are uniqued. if (!unique_keys.count(key.name)) @@ -256,14 +270,14 @@ void ExpressionAnalyzer::analyzeAggregation() } } else - aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); + aggregated_columns = temp_actions->getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); } else { - aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); + aggregated_columns = temp_actions->getNamesAndTypesList(); } } @@ -362,12 +376,11 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } else { - ExpressionActionsPtr temp_actions = std::make_shared(columns_after_join, context); + auto temp_actions = std::make_shared(columns_after_join); getRootActions(left_in_operand, true, temp_actions); - Block sample_block_with_calculated_columns = temp_actions->getSampleBlock(); - if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName())) - makeExplicitSet(func, sample_block_with_calculated_columns, true, context, + if (temp_actions->getIndex().count(left_in_operand->getColumnName()) != 0) + makeExplicitSet(func, *temp_actions, true, context, settings.size_limits_for_set, prepared_sets); } } @@ -375,29 +388,29 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, - sourceColumns(), actions, prepared_sets, subqueries_for_sets, + sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, no_subqueries, false, only_consts, !isRemoteStorage()); ActionsVisitor(visitor_data, log.stream()).visit(ast); - visitor_data.updateActions(actions); + actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, - sourceColumns(), actions, prepared_sets, subqueries_for_sets, + sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, no_subqueries, true, only_consts, !isRemoteStorage()); ActionsVisitor(visitor_data, log.stream()).visit(ast); - visitor_data.updateActions(actions); + actions = visitor_data.getActions(); } -bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions) +bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) { for (const ASTFunction * node : aggregates()) { @@ -412,7 +425,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & action { getRootActionsNoMakeSet(arguments[i], true, actions); const std::string & name = arguments[i]->getColumnName(); - types[i] = actions->getSampleBlock().getByName(name).type; + types[i] = actions->getIndex().find(name)->second->result_type; aggregate.argument_names[i] = name; } @@ -443,14 +456,14 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons } /// "Big" ARRAY JOIN. -ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const +ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool array_join_is_left) const { NameSet result_columns; for (const auto & result_source : syntax->array_join_result_to_source) { /// Assign new names to columns, if needed. if (result_source.first != result_source.second) - actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first)); + actions->addAlias(result_source.second, result_source.first); /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. result_columns.insert(result_source.first); @@ -472,8 +485,8 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi getRootActions(array_join_expression_list, only_types, step.actions()); - before_array_join = chain.getLastActions(); auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); + before_array_join = chain.getLastActions(); chain.steps.push_back(std::make_unique( array_join, step.getResultColumns())); @@ -615,13 +628,14 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer return subquery_for_join.join; } -bool SelectQueryExpressionAnalyzer::appendPrewhere( +ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) { const auto * select_query = getSelectQuery(); + ExpressionActionsPtr prewhere_actions; if (!select_query->prewhere()) - return false; + return prewhere_actions; auto & step = chain.lastStep(sourceColumns()); getRootActions(select_query->prewhere(), only_types, step.actions()); @@ -629,15 +643,16 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( step.required_output.push_back(prewhere_column_name); step.can_remove_required_output.push_back(true); - auto filter_type = step.actions()->getSampleBlock().getByName(prewhere_column_name).type; + auto filter_type = step.actions()->getIndex().find(prewhere_column_name)->second->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); { /// Remove unused source_columns from prewhere actions. - auto tmp_actions = std::make_shared(sourceColumns(), context); - getRootActions(select_query->prewhere(), only_types, tmp_actions); + auto tmp_actions_dag = std::make_shared(sourceColumns()); + getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); + auto tmp_actions = tmp_actions_dag->buildExpressions(context); tmp_actions->finalize({prewhere_column_name}); auto required_columns = tmp_actions->getRequiredColumns(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); @@ -653,7 +668,7 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( } } - auto names = step.actions()->getSampleBlock().getNames(); + auto names = step.actions()->getNames(); NameSet name_set(names.begin(), names.end()); for (const auto & column : sourceColumns()) @@ -661,7 +676,8 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( name_set.erase(column.name); Names required_output(name_set.begin(), name_set.end()); - step.actions()->finalize(required_output); + prewhere_actions = chain.getLastActions(); + prewhere_actions->finalize(required_output); } { @@ -672,8 +688,8 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( /// 2. Store side columns which were calculated during prewhere actions execution if they are used. /// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step. /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN. - ColumnsWithTypeAndName columns = step.actions()->getSampleBlock().getColumnsWithTypeAndName(); - auto required_columns = step.actions()->getRequiredColumns(); + ColumnsWithTypeAndName columns = prewhere_actions->getSampleBlock().getColumnsWithTypeAndName(); + auto required_columns = prewhere_actions->getRequiredColumns(); NameSet prewhere_input_names(required_columns.begin(), required_columns.end()); NameSet unused_source_columns; @@ -687,11 +703,13 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( } chain.steps.emplace_back(std::make_unique( - std::make_shared(std::move(columns), context))); + std::make_shared(std::move(columns)))); chain.steps.back()->additional_input = std::move(unused_source_columns); + chain.getLastActions(); + chain.addStep(); } - return true; + return prewhere_actions; } void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name) @@ -699,7 +717,8 @@ void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsCha ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); // FIXME: assert(filter_info); - step.actions() = std::move(actions); + auto * expression_step = typeid_cast(&step); + expression_step->actions = std::move(actions); step.required_output.push_back(std::move(column_name)); step.can_remove_required_output = {true}; @@ -721,7 +740,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); - auto filter_type = step.actions()->getSampleBlock().getByName(where_column_name).type; + auto filter_type = step.actions()->getIndex().find(where_column_name)->second->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); @@ -750,8 +769,9 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - group_by_elements_actions.emplace_back(std::make_shared(columns_after_join, context)); - getRootActions(child, only_types, group_by_elements_actions.back()); + auto actions_dag = std::make_shared(columns_after_join); + getRootActions(child, only_types, actions_dag); + group_by_elements_actions.emplace_back(actions_dag->buildExpressions(context)); } } @@ -838,8 +858,9 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain { for (auto & child : select_query->orderBy()->children) { - order_by_elements_actions.emplace_back(std::make_shared(columns_after_join, context)); - getRootActions(child, only_types, order_by_elements_actions.back()); + auto actions_dag = std::make_shared(columns_after_join); + getRootActions(child, only_types, actions_dag); + order_by_elements_actions.emplace_back(actions_dag->buildExpressions(context)); } } return true; @@ -873,7 +894,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain return true; } -void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -919,7 +940,9 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & } } - step.actions()->add(ExpressionAction::project(result_columns)); + auto actions = chain.getLastActions(); + actions->add(ExpressionAction::project(result_columns)); + return actions; } @@ -933,7 +956,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) { - ExpressionActionsPtr actions = std::make_shared(aggregated_columns, context); + auto actions_dag = std::make_shared(aggregated_columns); NamesWithAliases result_columns; Names result_names; @@ -954,9 +977,11 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje alias = name; result_columns.emplace_back(name, alias); result_names.push_back(alias); - getRootActions(ast, false, actions); + getRootActions(ast, false, actions_dag); } + auto actions = actions_dag->buildExpressions(context); + if (add_aliases) { if (project_result) @@ -980,10 +1005,10 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje ExpressionActionsPtr ExpressionAnalyzer::getConstActions() { - ExpressionActionsPtr actions = std::make_shared(NamesAndTypesList(), context); + auto actions = std::make_shared(NamesAndTypesList()); getRootActions(query, true, actions, true); - return actions; + return actions->buildExpressions(context); } ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions() @@ -1064,10 +1089,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( query_analyzer.appendPreliminaryFilter(chain, filter_info->actions, filter_info->column_name); } - if (query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) + if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) { - prewhere_info = std::make_shared( - chain.steps.front()->actions(), query.prewhere()->getColumnName()); + prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) { @@ -1081,7 +1105,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column); } } - chain.addStep(); } array_join = query_analyzer.appendArrayJoin(chain, before_array_join, only_types || !first_stage); @@ -1167,8 +1190,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.addStep(); } - query_analyzer.appendProjectResult(chain); - final_projection = chain.getLastActions(); + final_projection = query_analyzer.appendProjectResult(chain); finalize_chain(chain); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index cbfebafa439..0790c9f9bfb 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -37,6 +37,9 @@ using StorageMetadataPtr = std::shared_ptr; class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); @@ -137,15 +140,15 @@ protected: /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. void initGlobalSubqueriesAndExternalTables(bool do_global); - ArrayJoinActionPtr addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const; + ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, @@ -153,7 +156,7 @@ protected: * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. */ void analyzeAggregation(); - bool makeAggregateDescriptions(ExpressionActionsPtr & actions); + bool makeAggregateDescriptions(ActionsDAGPtr & actions); const ASTSelectQuery * getSelectQuery() const; @@ -267,7 +270,7 @@ public: /// These appends are public only for tests void appendSelect(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - void appendProjectResult(ExpressionActionsChain & chain) const; + ExpressionActionsPtr appendProjectResult(ExpressionActionsChain & chain) const; private: StorageMetadataPtr metadata_snapshot; @@ -317,7 +320,7 @@ private: void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); + ExpressionActionsPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index f661ec2ae71..256a3784c77 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -8,7 +8,7 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int SYNTAX_ERROR; extern const int AMBIGUOUS_COLUMN_NAME; } @@ -147,7 +147,7 @@ std::optional IdentifierSemantic::chooseTableColumnMatch(const ASTIdenti StorageID IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) { if (identifier.name_parts.size() > 2) - throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception("Syntax error: more than two components in table expression", ErrorCodes::SYNTAX_ERROR); if (identifier.name_parts.size() == 2) return { identifier.name_parts[0], identifier.name_parts[1], identifier.uuid }; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6f318b3658a..1b9c2ca3431 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -888,7 +888,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, cons { String zk_path = create.storage->engine->arguments->children[0]->as()->value.get(); Macros::MacroExpansionInfo info; - info.uuid = create.uuid; + info.table_id.uuid = create.uuid; info.ignore_unknown = true; context.getMacros()->expand(zk_path, info); if (!info.expanded_uuid) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index dec6a275872..c70431e5238 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -41,6 +41,9 @@ BlockIO InterpreterDropQuery::execute() if (!drop.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, context, getRequiredAccessForDDLOnCluster()); + if (context.getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously) + drop.no_delay = true; + if (!drop.table.empty()) { if (!drop.is_dictionary) @@ -124,6 +127,19 @@ BlockIO InterpreterDropQuery::executeToTable( } } + table.reset(); + ddl_guard = {}; + if (query.no_delay) + { + if (query.kind == ASTDropQuery::Kind::Drop) + DatabaseCatalog::instance().waitTableFinallyDropped(table_id.uuid); + else if (query.kind == ASTDropQuery::Kind::Detach) + { + if (auto * atomic = typeid_cast(database.get())) + atomic->waitDetachedTableNotInUse(table_id.uuid); + } + } + return {}; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 823808759a2..556070d0360 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -93,7 +93,6 @@ namespace ErrorCodes extern const int PARAMETER_OUT_OF_BOUND; extern const int INVALID_LIMIT_EXPRESSION; extern const int INVALID_WITH_FILL_EXPRESSION; - extern const int INVALID_SETTING_VALUE; } /// Assumes `storage` is set and the table filter (row-level security) is not empty. @@ -190,7 +189,7 @@ static Context getSubqueryContext(const Context & context) return subquery_context; } -static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings) +static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database) { ASTSelectQuery & select = query->as(); @@ -202,11 +201,7 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table CrossToInnerJoinVisitor::Data cross_to_inner{tables, aliases, database}; CrossToInnerJoinVisitor(cross_to_inner).visit(query); - size_t rewriter_version = settings.multiple_joins_rewriter_version; - if (!rewriter_version || rewriter_version > 2) - throw Exception("Bad multiple_joins_rewriter_version setting value: " + settings.multiple_joins_rewriter_version.toString(), - ErrorCodes::INVALID_SETTING_VALUE); - JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases, rewriter_version}; + JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases}; JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query); } @@ -271,7 +266,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Rewrite JOINs if (!has_input && joined_tables.tablesCount() > 1) { - rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase(), settings); + rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase()); joined_tables.reset(getSelectQuery()); joined_tables.resolveTables(); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 5f38f410e04..cdd7ec3ebf9 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -11,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -127,169 +128,6 @@ private: } }; -/// Find columns with aliases to push them into rewritten subselects. -/// Normalize table aliases: table_name.column_name -> table_alias.column_name -/// Make aliases maps (alias -> column_name, column_name -> alias) -struct ColumnAliasesMatcher -{ - using Visitor = ConstInDepthNodeVisitor; - - struct Data - { - const std::vector tables; - bool public_names; - AsteriskSemantic::RevertedAliases rev_aliases; /// long_name -> aliases - std::unordered_map aliases; /// alias -> long_name - std::vector> compound_identifiers; - std::set allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only). - bool inside_function = false; - - explicit Data(const std::vector && tables_) - : tables(tables_) - , public_names(false) - {} - - void replaceIdentifiersWithAliases() - { - String hide_prefix = "--"; /// @note restriction: user should not use aliases like `--table.column` - - for (auto & [identifier, is_public] : compound_identifiers) - { - String long_name = identifier->name; - - auto it = rev_aliases.find(long_name); - if (it == rev_aliases.end()) - { - bool last_table = false; - { - if (auto best_table_pos = IdentifierSemantic::chooseTable(*identifier, tables)) - last_table = (*best_table_pos + 1 == tables.size()); - } - - if (!last_table) - { - String alias = hide_prefix + long_name; - aliases[alias] = long_name; - rev_aliases[long_name].push_back(alias); - - IdentifierSemantic::coverName(*identifier, alias); - if (is_public) - { - identifier->setAlias(long_name); - allowed_long_names.insert(long_name); - } - } - else if (is_public) - identifier->setAlias(long_name); /// prevent crop long to short name - } - else - { - if (it->second.empty()) - throw Exception("No alias for '" + long_name + "'", ErrorCodes::LOGICAL_ERROR); - - if (is_public && allowed_long_names.count(long_name)) - ; /// leave original name unchanged for correct output - else - IdentifierSemantic::coverName(*identifier, it->second[0]); - } - } - } - }; - - static bool needChildVisit(const ASTPtr & node, const ASTPtr &) - { - /// Do not go into subqueries. Function visits children itself. - if (node->as() || - node->as()) - return false; - return !node->as(); - } - - static void visit(const ASTPtr & ast, Data & data) - { - if (auto * t = ast->as()) - visit(*t, ast, data); - else if (auto * f = ast->as()) - visit(*f, ast, data); - - /// Do not allow asterisks but ignore them inside functions. I.e. allow 'count(*)'. - if (!data.inside_function && (ast->as() || ast->as())) - throw Exception("Multiple JOIN do not support asterisks for complex queries yet", ErrorCodes::NOT_IMPLEMENTED); - } - - static void visit(const ASTFunction &, const ASTPtr & ast, Data & data) - { - /// Grandchild case: Function -> (ExpressionList) -> Asterisk - data.inside_function = true; - Visitor visitor(data); - for (auto & child : ast->children) - visitor.visit(child); - data.inside_function = false; - } - - static void visit(const ASTIdentifier & const_node, const ASTPtr &, Data & data) - { - ASTIdentifier & node = const_cast(const_node); /// we know it's not const - if (node.isShort()) - return; - - bool last_table = false; - String long_name; - - if (auto table_pos = IdentifierSemantic::chooseTable(node, data.tables)) - { - const auto & table = data.tables[*table_pos]; - IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name - long_name = node.name; - if (&table == &data.tables.back()) - last_table = true; - } - - if (long_name.empty()) - throw Exception("Cannot refer column '" + node.name + "' to table", ErrorCodes::AMBIGUOUS_COLUMN_NAME); - - String alias = node.tryGetAlias(); - if (!alias.empty()) - { - data.aliases[alias] = long_name; - data.rev_aliases[long_name].push_back(alias); - - if (!last_table) - { - IdentifierSemantic::coverName(node, alias); - node.setAlias({}); - } - } - else if (node.compound()) - data.compound_identifiers.emplace_back(&node, data.public_names); - } -}; - -/// Attach additional semantic info to generated selects. -struct AppendSemanticVisitorData -{ - using TypeToVisit = ASTSelectQuery; - - AsteriskSemantic::RevertedAliasesPtr rev_aliases = {}; - bool done = false; - - void visit(ASTSelectQuery & select, ASTPtr &) - { - if (done || !rev_aliases || !select.select()) - return; - - for (auto & child : select.select()->children) - { - if (auto * node = child->as()) - AsteriskSemantic::setAliases(*node, rev_aliases); - if (auto * node = child->as()) - AsteriskSemantic::setAliases(*node, rev_aliases); - } - - done = true; - } -}; - /// Replaces table elements with pair. struct RewriteTablesVisitorData { @@ -371,9 +209,6 @@ bool needRewrite(ASTSelectQuery & select, std::vector; using RewriteVisitor = InDepthNodeVisitor; using ExtractAsterisksVisitor = ConstInDepthNodeVisitor; -using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor; -using AppendSemanticMatcher = OneTypeMatcher; -using AppendSemanticVisitor = InDepthNodeVisitor; /// V2 specific visitors @@ -718,12 +553,7 @@ bool JoinToSubqueryTransformMatcher::needChildVisit(ASTPtr & node, const ASTPtr void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = ast->as()) - { - if (data.version == 1) - visitV1(*t, ast, data); - else - visitV2(*t, ast, data); - } + visit(*t, ast, data); } /// The reason for V2: not to alias columns without clashes. @@ -733,7 +563,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) /// 3. Rewrite multiple JOINs with subqueries: /// SELECT ... FROM (SELECT `--.s`.*, ... FROM (...) AS `--.s` JOIN tableY ON ...) AS `--.s` JOIN tableZ ON ...' /// 4. Push down expressions of aliases used in ON section into expression list of first reletad subquery -void JoinToSubqueryTransformMatcher::visitV2(ASTSelectQuery & select, ASTPtr & ast, Data & data) +void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) { std::vector table_expressions; if (!needRewrite<2>(select, table_expressions)) @@ -855,89 +685,6 @@ void JoinToSubqueryTransformMatcher::visitV2(ASTSelectQuery & select, ASTPtr & a data.done = true; } -void JoinToSubqueryTransformMatcher::visitV1(ASTSelectQuery & select, ASTPtr &, Data & data) -{ - using RevertedAliases = AsteriskSemantic::RevertedAliases; - - std::vector table_expressions; - if (!needRewrite(select, table_expressions)) - return; - - if (table_expressions.size() != data.tables.size()) - throw Exception("Inconsistent tables count in JOIN rewriter", ErrorCodes::LOGICAL_ERROR); - - bool has_subquery = false; - for (const auto & expr : table_expressions) - if (expr->subquery) - has_subquery = true; - - if (!has_subquery) - { - ExtractAsterisksVisitor::Data asterisks_data(data.tables); - ExtractAsterisksVisitor(asterisks_data).visit(select.select()); - if (asterisks_data.new_select_expression_list) - select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(asterisks_data.new_select_expression_list)); - } - - ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, "")); - if (select.select()) - { - /// TODO: there's a bug here. We need to publish only top-level ASTIdentifiers but visitor extracts all. - aliases_data.public_names = true; - ColumnAliasesVisitor(aliases_data).visit(select.select()); - aliases_data.public_names = false; - } - if (select.where()) - ColumnAliasesVisitor(aliases_data).visit(select.where()); - if (select.prewhere()) - ColumnAliasesVisitor(aliases_data).visit(select.prewhere()); - if (select.orderBy()) - ColumnAliasesVisitor(aliases_data).visit(select.orderBy()); - if (select.groupBy()) - ColumnAliasesVisitor(aliases_data).visit(select.groupBy()); - if (select.having()) - ColumnAliasesVisitor(aliases_data).visit(select.having()); - - /// JOIN sections - for (auto & child : select.tables()->children) - { - auto * table = child->as(); - if (table->table_join) - { - auto & join = table->table_join->as(); - if (join.on_expression) - ColumnAliasesVisitor(aliases_data).visit(join.on_expression); - } - } - - aliases_data.replaceIdentifiersWithAliases(); - - auto rev_aliases = std::make_shared(); - rev_aliases->swap(aliases_data.rev_aliases); - - auto & src_tables = select.tables()->children; - ASTPtr left_table = src_tables[0]; - - static ASTPtr subquery_template = makeSubqueryTemplate(); - - for (size_t i = 1; i < src_tables.size() - 1; ++i) - { - left_table = replaceJoin(left_table, src_tables[i], subquery_template->clone()); - if (!left_table) - throw Exception("Cannot replace tables with subselect", ErrorCodes::LOGICAL_ERROR); - - /// attach data to generated asterisk - AppendSemanticVisitor::Data semantic_data{rev_aliases, false}; - AppendSemanticVisitor(semantic_data).visit(left_table); - } - - /// replace tables in select with generated two-table join - RewriteVisitor::Data visitor_data{left_table, src_tables.back()}; - RewriteVisitor(visitor_data).visit(select.refTables()); - - data.done = true; -} - ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_right, ASTPtr subquery_template) { const auto * left = ast_left->as(); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.h b/src/Interpreters/JoinToSubqueryTransformVisitor.h index 643eb19b365..a024a168509 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.h +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.h @@ -20,7 +20,6 @@ public: { const std::vector & tables; const Aliases & aliases; - size_t version = 1; bool done = false; }; @@ -43,10 +42,7 @@ private: /// TablesInSelectQueryElement [source1] /// TablesInSelectQueryElement [source2] /// - static void visitV1(ASTSelectQuery & select, ASTPtr & ast, Data & data); - - /// V2 uses information about tables' columns to rewrite queries. - static void visitV2(ASTSelectQuery & select, ASTPtr & ast, Data & data); + static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data); /// @return combined TablesInSelectQueryElement or nullptr if cannot rewrite static ASTPtr replaceJoin(ASTPtr left, ASTPtr right, ASTPtr subquery_template); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 30da0d6e65f..3e7ebfec139 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -619,19 +619,20 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & for (const auto & kv : stage.column_to_updated) { - actions_chain.getLastActions()->add(ExpressionAction::copyColumn( - kv.second->getColumnName(), kv.first, /* can_replace = */ true)); + actions_chain.getLastStep().actions()->addAlias( + kv.second->getColumnName(), kv.first, /* can_replace = */ true); } } /// Remove all intermediate columns. actions_chain.addStep(); actions_chain.getLastStep().required_output.assign(stage.output_columns.begin(), stage.output_columns.end()); + actions_chain.getLastActions(); actions_chain.finalize(); /// Propagate information about columns needed as input. - for (const auto & column : actions_chain.steps.front()->actions()->getRequiredColumnsWithTypes()) + for (const auto & column : actions_chain.steps.front()->getRequiredColumns()) prepared_stages[i - 1].output_columns.insert(column.name); } @@ -675,12 +676,12 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto if (i < stage.filter_column_names.size()) { /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions(), stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->getExpression(), stage.filter_column_names[i], false)); } else { /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions())); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->getExpression())); } } diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 018ddbcfa1d..2203e91e913 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -431,7 +431,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even } if (get_settings && query_context) - res.query_settings = std::make_shared(query_context->getSettingsRef()); + res.query_settings = std::make_shared(query_context->getSettings()); return res; } diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 75e0fae615a..c2273a1db2c 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -39,6 +39,7 @@ Block QueryLogElement::createBlock() {std::move(query_status_datatype), "type"}, {std::make_shared(), "event_date"}, {std::make_shared(), "event_time"}, + {std::make_shared(6), "event_time_microseconds"}, {std::make_shared(), "query_start_time"}, {std::make_shared(6), "query_start_time_microseconds"}, {std::make_shared(), "query_duration_ms"}, @@ -97,6 +98,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(type); columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); columns[i++]->insert(event_time); + columns[i++]->insert(event_time_microseconds); columns[i++]->insert(query_start_time); columns[i++]->insert(query_start_time_microseconds); columns[i++]->insert(query_duration_ms); diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index d1297feb3fb..9d42b787160 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -30,6 +30,7 @@ struct QueryLogElement /// Depending on the type of query and type of stage, not all the fields may be filled. time_t event_time{}; + UInt64 event_time_microseconds{}; time_t query_start_time{}; UInt64 query_start_time_microseconds{}; UInt64 query_duration_ms{}; diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index e5a8cf7c5cf..2ecb03d622a 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -23,6 +23,7 @@ Block QueryThreadLogElement::createBlock() return { {std::make_shared(), "event_date"}, {std::make_shared(), "event_time"}, + {std::make_shared(6), "event_time_microseconds"}, {std::make_shared(), "query_start_time"}, {std::make_shared(6), "query_start_time_microseconds"}, {std::make_shared(), "query_duration_ms"}, @@ -73,6 +74,7 @@ void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); columns[i++]->insert(event_time); + columns[i++]->insert(event_time_microseconds); columns[i++]->insert(query_start_time); columns[i++]->insert(query_start_time_microseconds); columns[i++]->insert(query_duration_ms); diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index 66a480bfa0d..715902b29ad 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -16,6 +16,7 @@ namespace DB struct QueryThreadLogElement { time_t event_time{}; + UInt64 event_time_microseconds{}; /// When query was attached to current thread time_t query_start_time{}; /// same as above but adds microsecond precision diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 03b1b735cbc..2a0ce9cef53 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -438,7 +438,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, ASTPtr query_ptr(insert.release()); // we need query context to do inserts to target table with MV containing subqueries or joins - auto insert_context = Context(context); + Context insert_context(context); insert_context.makeQueryContext(); InterpreterInsertQuery interpreter(query_ptr, insert_context); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 243bf6d299a..d1cc6a052e8 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ Block TextLogElement::createBlock() { {std::make_shared(), "event_date"}, {std::make_shared(), "event_time"}, + {std::make_shared(6), "event_time_microseconds"}, {std::make_shared(), "microseconds"}, {std::make_shared(std::make_shared()), "thread_name"}, @@ -52,6 +54,7 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); columns[i++]->insert(event_time); + columns[i++]->insert(event_time_microseconds); columns[i++]->insert(microseconds); columns[i++]->insertData(thread_name.data(), thread_name.size()); diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index d5d1610dfb5..814b3c73044 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -9,6 +9,7 @@ using Poco::Message; struct TextLogElement { time_t event_time{}; + UInt64 event_time_microseconds{}; UInt32 microseconds; String thread_name; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 0f610d9f6d2..61245782ba9 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -322,7 +322,14 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log) { QueryThreadLogElement elem; - elem.event_time = time(nullptr); + // construct current_time and current_time_microseconds using the same time point + // so that the two times will always be equal up to a precision of a second. + const auto now = std::chrono::system_clock::now(); + auto current_time = time_in_seconds(now); + auto current_time_microseconds = time_in_microseconds(now); + + elem.event_time = current_time; + elem.event_time_microseconds = current_time_microseconds; elem.query_start_time = query_start_time; elem.query_start_time_microseconds = query_start_time_microseconds; elem.query_duration_ms = (getCurrentTimeNanoseconds() - query_start_time_nanoseconds) / 1000000U; diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index f7e82032f49..40bcc0db445 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -26,6 +27,7 @@ Block TraceLogElement::createBlock() { {std::make_shared(), "event_date"}, {std::make_shared(), "event_time"}, + {std::make_shared(6), "event_time_microseconds"}, {std::make_shared(), "timestamp_ns"}, {std::make_shared(), "revision"}, {std::make_shared(trace_values), "trace_type"}, @@ -42,6 +44,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); columns[i++]->insert(event_time); + columns[i++]->insert(event_time_microseconds); columns[i++]->insert(timestamp_ns); columns[i++]->insert(ClickHouseRevision::getVersionRevision()); columns[i++]->insert(static_cast(trace_type)); diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 06f7f27299d..d694a6201f7 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -18,6 +18,7 @@ struct TraceLogElement static const TraceDataType::Values trace_values; time_t event_time{}; + UInt64 event_time_microseconds{}; UInt64 timestamp_ns{}; TraceType trace_type{}; UInt64 thread_id{}; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 74622c72865..9e0b6fdd196 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -174,25 +173,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr Visitor(data).visit(select.refHaving()); } -static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name, - AsteriskSemantic::RevertedAliasesPtr aliases) +static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name) { String table_name = table.getQualifiedNamePrefix(false); auto identifier = std::make_shared(std::vector{table_name, column_name}); - - bool added = false; - if (aliases && aliases->count(identifier->name)) - { - for (const String & alias : (*aliases)[identifier->name]) - { - nodes.push_back(identifier->clone()); - nodes.back()->setAlias(alias); - added = true; - } - } - - if (!added) - nodes.emplace_back(identifier); + nodes.emplace_back(identifier); } /// Replace *, alias.*, database.table.* with a list of columns. @@ -237,7 +222,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { if (first_table || !data.join_using_columns.count(column.name)) { - addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk)); + addIdentifier(node.children, table.table, column.name); } } @@ -264,7 +249,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { if (asterisk_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name))) { - addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk_pattern)); + addIdentifier(node.children, table.table, column.name); } } @@ -287,7 +272,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { for (const auto & column : table.columns) { - addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*qualified_asterisk)); + addIdentifier(node.children, table.table, column.name); } break; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 59896065a87..c82748eadc0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -206,10 +206,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c elem.type = QueryLogElementType::EXCEPTION_BEFORE_START; - // all callers to onExceptionBeforeStart upstream construct the timespec for event_time and - // event_time_microseconds from the same timespec. So it can be assumed that both of these + // all callers to onExceptionBeforeStart method construct the timespec for event_time and + // event_time_microseconds from the same time point. So, it can be assumed that both of these // times are equal upto the precision of a second. elem.event_time = current_time; + elem.event_time_microseconds = current_time_microseconds; elem.query_start_time = current_time; elem.query_start_time_microseconds = current_time_microseconds; @@ -319,7 +320,9 @@ static std::tuple executeQueryImpl( logQuery(query_for_logging, context, internal); if (!internal) + { onExceptionBeforeStart(query_for_logging, context, current_time, current_time_microseconds, ast); + } throw; } @@ -484,6 +487,7 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::QUERY_START; elem.event_time = current_time; + elem.event_time_microseconds = current_time_microseconds; elem.query_start_time = current_time; elem.query_start_time_microseconds = current_time_microseconds; @@ -554,8 +558,11 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::QUERY_FINISH; - elem.event_time = time(nullptr); - + // construct event_time and event_time_microseconds using the same time point + // so that the two times will always be equal up to a precision of a second. + const auto time_now = std::chrono::system_clock::now(); + elem.event_time = time_in_seconds(time_now); + elem.event_time_microseconds = time_in_microseconds(time_now); status_info_to_query_log(elem, info, ast); auto progress_callback = context.getProgressCallback(); @@ -615,7 +622,12 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; - elem.event_time = time(nullptr); + // event_time and event_time_microseconds are being constructed from the same time point + // to ensure that both the times will be equal upto the precision of a second. + const auto time_now = std::chrono::system_clock::now(); + + elem.event_time = time_in_seconds(time_now); + elem.event_time_microseconds = time_in_microseconds(time_now); elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); diff --git a/src/Parsers/ASTAsterisk.h b/src/Parsers/ASTAsterisk.h index 9c4c9a2df6d..027758ba48c 100644 --- a/src/Parsers/ASTAsterisk.h +++ b/src/Parsers/ASTAsterisk.h @@ -6,9 +6,6 @@ namespace DB { -struct AsteriskSemantic; -struct AsteriskSemanticImpl; - /** SELECT * is expanded to all visible columns of the source table. * Optional transformers can be attached to further manipulate these expanded columns. */ @@ -21,11 +18,6 @@ public: protected: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; - -private: - std::shared_ptr semantic; /// pimpl - - friend struct AsteriskSemantic; }; } diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h index ddf0d70dc35..4b7a933647e 100644 --- a/src/Parsers/ASTColumnsTransformers.h +++ b/src/Parsers/ASTColumnsTransformers.h @@ -53,7 +53,7 @@ public: ASTPtr clone() const override { auto replacement = std::make_shared(*this); - replacement->name = name; + replacement->children.clear(); replacement->expr = expr->clone(); replacement->children.push_back(replacement->expr); return replacement; diff --git a/src/Parsers/ASTQualifiedAsterisk.h b/src/Parsers/ASTQualifiedAsterisk.h index 2c3689d0ace..1b644532f53 100644 --- a/src/Parsers/ASTQualifiedAsterisk.h +++ b/src/Parsers/ASTQualifiedAsterisk.h @@ -6,9 +6,6 @@ namespace DB { -struct AsteriskSemantic; -struct AsteriskSemanticImpl; - /** Something like t.* * It will have qualifier as its child ASTIdentifier. * Optional transformers can be attached to further manipulate these expanded columns. @@ -27,11 +24,6 @@ public: protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - -private: - std::shared_ptr semantic; /// pimpl - - friend struct AsteriskSemantic; }; } diff --git a/src/Parsers/ParserDropQuery.cpp b/src/Parsers/ParserDropQuery.cpp index 28ab3bdac32..31a6250a006 100644 --- a/src/Parsers/ParserDropQuery.cpp +++ b/src/Parsers/ParserDropQuery.cpp @@ -22,6 +22,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, bool ParserKeyword s_if_exists("IF EXISTS"); ParserIdentifier name_p; ParserKeyword s_no_delay("NO DELAY"); + ParserKeyword s_sync("SYNC"); ASTPtr database; ASTPtr table; @@ -79,7 +80,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, bool return false; } - if (s_no_delay.ignore(pos, expected)) + if (s_no_delay.ignore(pos, expected) || s_sync.ignore(pos, expected)) no_delay = true; } diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp new file mode 100644 index 00000000000..32382b70bd7 --- /dev/null +++ b/src/Parsers/obfuscateQueries.cpp @@ -0,0 +1,937 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_MANY_TEMPORARY_COLUMNS; +} + + +namespace +{ + +const std::unordered_set keywords +{ + "CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", + "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", + "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", + "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", + "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", + "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", + "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", + "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", + "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", + "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", + "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", + "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY" +}; + +const std::unordered_set keep_words +{ + "id", "name", "value", "num", + "Id", "Name", "Value", "Num", + "ID", "NAME", "VALUE", "NUM", +}; + +/// The list of nouns collected from here: http://www.desiquintans.com/nounlist, Public domain. +std::initializer_list nouns +{ +"aardvark", "abacus", "abbey", "abbreviation", "abdomen", "ability", "abnormality", "abolishment", "abortion", +"abrogation", "absence", "abundance", "abuse", "academics", "academy", "accelerant", "accelerator", "accent", "acceptance", "access", +"accessory", "accident", "accommodation", "accompanist", "accomplishment", "accord", "accordance", "accordion", "account", "accountability", +"accountant", "accounting", "accuracy", "accusation", "acetate", "achievement", "achiever", "acid", "acknowledgment", "acorn", "acoustics", +"acquaintance", "acquisition", "acre", "acrylic", "act", "action", "activation", "activist", "activity", "actor", "actress", "acupuncture", +"ad", "adaptation", "adapter", "addiction", "addition", "address", "adjective", "adjustment", "admin", "administration", "administrator", +"admire", "admission", "adobe", "adoption", "adrenalin", "adrenaline", "adult", "adulthood", "advance", "advancement", "advantage", "advent", +"adverb", "advertisement", "advertising", "advice", "adviser", "advocacy", "advocate", "affair", "affect", "affidavit", "affiliate", +"affinity", "afoul", "afterlife", "aftermath", "afternoon", "aftershave", "aftershock", "afterthought", "age", "agency", "agenda", "agent", +"aggradation", "aggression", "aglet", "agony", "agreement", "agriculture", "aid", "aide", "aim", "air", "airbag", "airbus", "aircraft", +"airfare", "airfield", "airforce", "airline", "airmail", "airman", "airplane", "airport", "airship", "airspace", "alarm", "alb", "albatross", +"album", "alcohol", "alcove", "alder", "ale", "alert", "alfalfa", "algebra", "algorithm", "alias", "alibi", "alien", "allegation", "allergist", +"alley", "alliance", "alligator", "allocation", "allowance", "alloy", "alluvium", "almanac", "almighty", "almond", "alpaca", "alpenglow", +"alpenhorn", "alpha", "alphabet", "altar", "alteration", "alternative", "altitude", "alto", "aluminium", "aluminum", "amazement", "amazon", +"ambassador", "amber", "ambience", "ambiguity", "ambition", "ambulance", "amendment", "amenity", "ammunition", "amnesty", "amount", "amusement", +"anagram", "analgesia", "analog", "analogue", "analogy", "analysis", "analyst", "analytics", "anarchist", "anarchy", "anatomy", "ancestor", +"anchovy", "android", "anesthesiologist", "anesthesiology", "angel", "anger", "angina", "angiosperm", "angle", "angora", "angstrom", +"anguish", "animal", "anime", "anise", "ankle", "anklet", "anniversary", "announcement", "annual", "anorak", "answer", "ant", "anteater", +"antecedent", "antechamber", "antelope", "antennae", "anterior", "anthropology", "antibody", "anticipation", "anticodon", "antigen", +"antique", "antiquity", "antler", "antling", "anxiety", "anybody", "anyone", "anything", "anywhere", "apartment", "ape", "aperitif", +"apology", "app", "apparatus", "apparel", "appeal", "appearance", "appellation", "appendix", "appetiser", "appetite", "appetizer", "applause", +"apple", "applewood", "appliance", "application", "appointment", "appreciation", "apprehension", "approach", "appropriation", "approval", +"apricot", "apron", "apse", "aquarium", "aquifer", "arcade", "arch", "archaeologist", "archaeology", "archeology", "archer", +"architect", "architecture", "archives", "area", "arena", "argument", "arithmetic", "ark", "arm", "armadillo", "armament", +"armchair", "armoire", "armor", "armour", "armpit", "armrest", "army", "arrangement", "array", "arrest", "arrival", "arrogance", "arrow", +"art", "artery", "arthur", "artichoke", "article", "artifact", "artificer", "artist", "ascend", "ascent", "ascot", "ash", "ashram", "ashtray", +"aside", "asparagus", "aspect", "asphalt", "aspic", "assassination", "assault", "assembly", "assertion", "assessment", "asset", +"assignment", "assist", "assistance", "assistant", "associate", "association", "assumption", "assurance", "asterisk", "astrakhan", "astrolabe", +"astrologer", "astrology", "astronomy", "asymmetry", "atelier", "atheist", "athlete", "athletics", "atmosphere", "atom", "atrium", "attachment", +"attack", "attacker", "attainment", "attempt", "attendance", "attendant", "attention", "attenuation", "attic", "attitude", "attorney", +"attraction", "attribute", "auction", "audience", "audit", "auditorium", "aunt", "authentication", "authenticity", "author", "authorisation", +"authority", "authorization", "auto", "autoimmunity", "automation", "automaton", "autumn", "availability", "avalanche", "avenue", "average", +"avocado", "award", "awareness", "awe", "axis", "azimuth", "babe", "baboon", "babushka", "baby", "bachelor", "back", "backbone", +"backburn", "backdrop", "background", "backpack", "backup", "backyard", "bacon", "bacterium", "badge", "badger", "bafflement", "bag", +"bagel", "baggage", "baggie", "baggy", "bagpipe", "bail", "bait", "bake", "baker", "bakery", "bakeware", "balaclava", "balalaika", "balance", +"balcony", "ball", "ballet", "balloon", "balloonist", "ballot", "ballpark", "bamboo", "ban", "banana", "band", "bandana", "bandanna", +"bandolier", "bandwidth", "bangle", "banjo", "bank", "bankbook", "banker", "banking", "bankruptcy", "banner", "banquette", "banyan", +"baobab", "bar", "barbecue", "barbeque", "barber", "barbiturate", "bargain", "barge", "baritone", "barium", "bark", "barley", "barn", +"barometer", "barracks", "barrage", "barrel", "barrier", "barstool", "bartender", "base", "baseball", "baseboard", "baseline", "basement", +"basics", "basil", "basin", "basis", "basket", "basketball", "bass", "bassinet", "bassoon", "bat", "bath", "bather", "bathhouse", "bathrobe", +"bathroom", "bathtub", "battalion", "batter", "battery", "batting", "battle", "battleship", "bay", "bayou", "beach", "bead", "beak", +"beam", "bean", "beancurd", "beanie", "beanstalk", "bear", "beard", "beast", "beastie", "beat", "beating", "beauty", "beaver", "beck", +"bed", "bedrock", "bedroom", "bee", "beech", "beef", "beer", "beet", "beetle", "beggar", "beginner", "beginning", "begonia", "behalf", +"behavior", "behaviour", "beheading", "behest", "behold", "being", "belfry", "belief", "believer", "bell", "belligerency", "bellows", +"belly", "belt", "bench", "bend", "beneficiary", "benefit", "beret", "berry", "bestseller", "bet", "beverage", "beyond", +"bias", "bibliography", "bicycle", "bid", "bidder", "bidding", "bidet", "bifocals", "bijou", "bike", "bikini", "bill", "billboard", "billing", +"billion", "bin", "binoculars", "biology", "biopsy", "biosphere", "biplane", "birch", "bird", "birdbath", "birdcage", +"birdhouse", "birth", "birthday", "biscuit", "bit", "bite", "bitten", "bitter", "black", "blackberry", "blackbird", "blackboard", "blackfish", +"blackness", "bladder", "blade", "blame", "blank", "blanket", "blast", "blazer", "blend", "blessing", "blight", "blind", "blinker", "blister", +"blizzard", "block", "blocker", "blog", "blogger", "blood", "bloodflow", "bloom", "bloomer", "blossom", "blouse", "blow", "blowgun", +"blowhole", "blue", "blueberry", "blush", "boar", "board", "boat", "boatload", "boatyard", "bob", "bobcat", "body", "bog", "bolero", +"bolt", "bomb", "bomber", "bombing", "bond", "bonding", "bondsman", "bone", "bonfire", "bongo", "bonnet", "bonsai", "bonus", "boogeyman", +"book", "bookcase", "bookend", "booking", "booklet", "bookmark", "boolean", "boom", "boon", "boost", "booster", "boot", "bootee", "bootie", +"booty", "border", "bore", "borrower", "borrowing", "bosom", "boss", "botany", "bother", "bottle", "bottling", "bottom", +"boudoir", "bough", "boulder", "boulevard", "boundary", "bouquet", "bourgeoisie", "bout", "boutique", "bow", "bower", "bowl", "bowler", +"bowling", "bowtie", "box", "boxer", "boxspring", "boy", "boycott", "boyfriend", "boyhood", "boysenberry", "bra", "brace", "bracelet", +"bracket", "brain", "brake", "bran", "branch", "brand", "brandy", "brass", "brassiere", "bratwurst", "bread", "breadcrumb", "breadfruit", +"break", "breakdown", "breakfast", "breakpoint", "breakthrough", "breast", "breastplate", "breath", "breeze", "brewer", "bribery", "brick", +"bricklaying", "bride", "bridge", "brief", "briefing", "briefly", "briefs", "brilliant", "brink", "brisket", "broad", "broadcast", "broccoli", +"brochure", "brocolli", "broiler", "broker", "bronchitis", "bronco", "bronze", "brooch", "brood", "brook", "broom", "brother", +"brow", "brown", "brownie", "browser", "browsing", "brunch", "brush", "brushfire", "brushing", "bubble", "buck", "bucket", "buckle", +"buckwheat", "bud", "buddy", "budget", "buffalo", "buffer", "buffet", "bug", "buggy", "bugle", "builder", "building", "bulb", "bulk", +"bull", "bulldozer", "bullet", "bump", "bumper", "bun", "bunch", "bungalow", "bunghole", "bunkhouse", "burden", "bureau", +"burglar", "burial", "burlesque", "burn", "burning", "burrito", "burro", "burrow", "burst", "bus", "bush", "business", "businessman", +"bust", "bustle", "butane", "butcher", "butler", "butter", "butterfly", "button", "buy", "buyer", "buying", "buzz", "buzzard", +"cabana", "cabbage", "cabin", "cabinet", "cable", "caboose", "cacao", "cactus", "caddy", "cadet", "cafe", "caffeine", "caftan", "cage", +"cake", "calcification", "calculation", "calculator", "calculus", "calendar", "calf", "caliber", "calibre", "calico", "call", "calm", +"calorie", "camel", "cameo", "camera", "camp", "campaign", "campaigning", "campanile", "camper", "campus", "can", "canal", "cancer", +"candelabra", "candidacy", "candidate", "candle", "candy", "cane", "cannibal", "cannon", "canoe", "canon", "canopy", "cantaloupe", "canteen", +"canvas", "cap", "capability", "capacity", "cape", "caper", "capital", "capitalism", "capitulation", "capon", "cappelletti", "cappuccino", +"captain", "caption", "captor", "car", "carabao", "caramel", "caravan", "carbohydrate", "carbon", "carboxyl", "card", "cardboard", "cardigan", +"care", "career", "cargo", "caribou", "carload", "carnation", "carnival", "carol", "carotene", "carp", "carpenter", "carpet", "carpeting", +"carport", "carriage", "carrier", "carrot", "carry", "cart", "cartel", "carter", "cartilage", "cartload", "cartoon", "cartridge", "carving", +"cascade", "case", "casement", "cash", "cashew", "cashier", "casino", "casket", "cassava", "casserole", "cassock", "cast", "castanet", +"castle", "casualty", "cat", "catacomb", "catalogue", "catalysis", "catalyst", "catamaran", "catastrophe", "catch", "catcher", "category", +"caterpillar", "cathedral", "cation", "catsup", "cattle", "cauliflower", "causal", "cause", "causeway", "caution", "cave", "caviar", +"cayenne", "ceiling", "celebration", "celebrity", "celeriac", "celery", "cell", "cellar", "cello", "celsius", "cement", "cemetery", "cenotaph", +"census", "cent", "center", "centimeter", "centre", "centurion", "century", "cephalopod", "ceramic", "ceramics", "cereal", "ceremony", +"certainty", "certificate", "certification", "cesspool", "chafe", "chain", "chainstay", "chair", "chairlift", "chairman", "chairperson", +"chaise", "chalet", "chalice", "chalk", "challenge", "chamber", "champagne", "champion", "championship", "chance", "chandelier", "change", +"channel", "chaos", "chap", "chapel", "chaplain", "chapter", "character", "characteristic", "characterization", "chard", "charge", "charger", +"charity", "charlatan", "charm", "charset", "chart", "charter", "chasm", "chassis", "chastity", "chasuble", "chateau", "chatter", "chauffeur", +"chauvinist", "check", "checkbook", "checking", "checkout", "checkroom", "cheddar", "cheek", "cheer", "cheese", "cheesecake", "cheetah", +"chef", "chem", "chemical", "chemistry", "chemotaxis", "cheque", "cherry", "chess", "chest", "chestnut", "chick", "chicken", "chicory", +"chief", "chiffonier", "child", "childbirth", "childhood", "chili", "chill", "chime", "chimpanzee", "chin", "chinchilla", "chino", "chip", +"chipmunk", "chivalry", "chive", "chives", "chocolate", "choice", "choir", "choker", "cholesterol", "choosing", "chop", +"chops", "chopstick", "chopsticks", "chord", "chorus", "chow", "chowder", "chrome", "chromolithograph", "chronicle", "chronograph", "chronometer", +"chrysalis", "chub", "chuck", "chug", "church", "churn", "chutney", "cicada", "cigarette", "cilantro", "cinder", "cinema", "cinnamon", +"circadian", "circle", "circuit", "circulation", "circumference", "circumstance", "cirrhosis", "cirrus", "citizen", "citizenship", "citron", +"citrus", "city", "civilian", "civilisation", "civilization", "claim", "clam", "clamp", "clan", "clank", "clapboard", "clarification", +"clarinet", "clarity", "clasp", "class", "classic", "classification", "classmate", "classroom", "clause", "clave", "clavicle", "clavier", +"claw", "clay", "cleaner", "clearance", "clearing", "cleat", "cleavage", "clef", "cleft", "clergyman", "cleric", "clerk", "click", "client", +"cliff", "climate", "climb", "clinic", "clip", "clipboard", "clipper", "cloak", "cloakroom", "clock", "clockwork", "clogs", "cloister", +"clone", "close", "closet", "closing", "closure", "cloth", "clothes", "clothing", "cloud", "cloudburst", "clove", "clover", "cloves", +"club", "clue", "cluster", "clutch", "coach", "coal", "coalition", "coast", "coaster", "coat", "cob", "cobbler", "cobweb", +"cock", "cockpit", "cockroach", "cocktail", "cocoa", "coconut", "cod", "code", "codepage", "codling", "codon", "codpiece", "coevolution", +"cofactor", "coffee", "coffin", "cohesion", "cohort", "coil", "coin", "coincidence", "coinsurance", "coke", "cold", "coleslaw", "coliseum", +"collaboration", "collagen", "collapse", "collar", "collard", "collateral", "colleague", "collection", "collectivisation", "collectivization", +"collector", "college", "collision", "colloquy", "colon", "colonial", "colonialism", "colonisation", "colonization", "colony", "color", +"colorlessness", "colt", "column", "columnist", "comb", "combat", "combination", "combine", "comeback", "comedy", "comestible", "comfort", +"comfortable", "comic", "comics", "comma", "command", "commander", "commandment", "comment", "commerce", "commercial", "commission", +"commitment", "committee", "commodity", "common", "commonsense", "commotion", "communicant", "communication", "communion", "communist", +"community", "commuter", "company", "comparison", "compass", "compassion", "compassionate", "compensation", "competence", "competition", +"competitor", "complaint", "complement", "completion", "complex", "complexity", "compliance", "complication", "complicity", "compliment", +"component", "comportment", "composer", "composite", "composition", "compost", "comprehension", "compress", "compromise", "comptroller", +"compulsion", "computer", "comradeship", "con", "concentrate", "concentration", "concept", "conception", "concern", "concert", "conclusion", +"concrete", "condition", "conditioner", "condominium", "condor", "conduct", "conductor", "cone", "confectionery", "conference", "confidence", +"confidentiality", "configuration", "confirmation", "conflict", "conformation", "confusion", "conga", "congo", "congregation", "congress", +"congressman", "congressperson", "conifer", "connection", "connotation", "conscience", "consciousness", "consensus", "consent", "consequence", +"conservation", "conservative", "consideration", "consignment", "consist", "consistency", "console", "consonant", "conspiracy", "conspirator", +"constant", "constellation", "constitution", "constraint", "construction", "consul", "consulate", "consulting", "consumer", "consumption", +"contact", "contact lens", "contagion", "container", "content", "contention", "contest", "context", "continent", "contingency", "continuity", +"contour", "contract", "contractor", "contrail", "contrary", "contrast", "contribution", "contributor", "control", "controller", "controversy", +"convection", "convenience", "convention", "conversation", "conversion", "convert", "convertible", "conviction", "cook", "cookbook", +"cookie", "cooking", "coonskin", "cooperation", "coordination", "coordinator", "cop", "cope", "copper", "copy", "copying", +"copyright", "copywriter", "coral", "cord", "corduroy", "core", "cork", "cormorant", "corn", "corner", "cornerstone", "cornet", "cornflakes", +"cornmeal", "corporal", "corporation", "corporatism", "corps", "corral", "correspondence", "correspondent", "corridor", "corruption", +"corsage", "cosset", "cost", "costume", "cot", "cottage", "cotton", "couch", "cougar", "cough", "council", "councilman", "councilor", +"councilperson", "counsel", "counseling", "counselling", "counsellor", "counselor", "count", "counter", "counterpart", +"counterterrorism", "countess", "country", "countryside", "county", "couple", "coupon", "courage", "course", "court", "courthouse", "courtroom", +"cousin", "covariate", "cover", "coverage", "coverall", "cow", "cowbell", "cowboy", "coyote", "crab", "crack", "cracker", "crackers", +"cradle", "craft", "craftsman", "cranberry", "crane", "cranky", "crash", "crate", "cravat", "craw", "crawdad", "crayfish", "crayon", +"crazy", "cream", "creation", "creationism", "creationist", "creative", "creativity", "creator", "creature", "creche", "credential", +"credenza", "credibility", "credit", "creditor", "creek", "creme brulee", "crepe", "crest", "crew", "crewman", "crewmate", "crewmember", +"crewmen", "cria", "crib", "cribbage", "cricket", "cricketer", "crime", "criminal", "crinoline", "crisis", "crisp", "criteria", "criterion", +"critic", "criticism", "crocodile", "crocus", "croissant", "crook", "crop", "cross", "crotch", +"croup", "crow", "crowd", "crown", "crucifixion", "crude", "cruelty", "cruise", "crumb", "crunch", "crusader", "crush", "crust", "cry", +"crystal", "crystallography", "cub", "cube", "cuckoo", "cucumber", "cue", "cuisine", "cultivar", "cultivator", "culture", +"culvert", "cummerbund", "cup", "cupboard", "cupcake", "cupola", "curd", "cure", "curio", "curiosity", "curl", "curler", "currant", "currency", +"current", "curriculum", "curry", "curse", "cursor", "curtailment", "curtain", "curve", "cushion", "custard", "custody", "custom", "customer", +"cut", "cuticle", "cutlet", "cutover", "cutting", "cyclamen", "cycle", "cyclone", "cyclooxygenase", "cygnet", "cylinder", "cymbal", "cynic", +"cyst", "cytokine", "cytoplasm", "dad", "daddy", "daffodil", "dagger", "dahlia", "daikon", "daily", "dairy", "daisy", "dam", "damage", +"dame", "dance", "dancer", "dancing", "dandelion", "danger", "dare", "dark", "darkness", "darn", "dart", "dash", "dashboard", +"data", "database", "date", "daughter", "dawn", "day", "daybed", "daylight", "dead", "deadline", "deal", "dealer", "dealing", "dearest", +"death", "deathwatch", "debate", "debris", "debt", "debtor", "decade", "decadence", "decency", "decimal", "decision", +"deck", "declaration", "declination", "decline", "decoder", "decongestant", "decoration", "decrease", "decryption", "dedication", "deduce", +"deduction", "deed", "deep", "deer", "default", "defeat", "defendant", "defender", "defense", "deficit", "definition", "deformation", +"degradation", "degree", "delay", "deliberation", "delight", "delivery", "demand", "democracy", "democrat", "demon", "demur", "den", +"denim", "denominator", "density", "dentist", "deodorant", "department", "departure", "dependency", "dependent", "deployment", "deposit", +"deposition", "depot", "depression", "depressive", "depth", "deputy", "derby", "derivation", "derivative", "derrick", "descendant", "descent", +"description", "desert", "design", "designation", "designer", "desire", "desk", "desktop", "dessert", "destination", "destiny", "destroyer", +"destruction", "detail", "detainee", "detainment", "detection", "detective", "detector", "detention", "determination", "detour", "devastation", +"developer", "developing", "development", "developmental", "deviance", "deviation", "device", "devil", "dew", "dhow", "diabetes", "diadem", +"diagnosis", "diagram", "dial", "dialect", "dialogue", "diam", "diamond", "diaper", "diaphragm", "diarist", "diary", "dibble", "dickey", "dictaphone", "dictator", "diction", "dictionary", "die", "diesel", "diet", "difference", "differential", "difficulty", "diffuse", +"dig", "digestion", "digestive", "digger", "digging", "digit", "dignity", "dilapidation", "dill", "dilution", "dime", "dimension", "dimple", +"diner", "dinghy", "dining", "dinner", "dinosaur", "dioxide", "dip", "diploma", "diplomacy", "dipstick", "direction", "directive", "director", +"directory", "dirndl", "dirt", "disability", "disadvantage", "disagreement", "disappointment", "disarmament", "disaster", "discharge", +"discipline", "disclaimer", "disclosure", "disco", "disconnection", "discount", "discourse", "discovery", "discrepancy", "discretion", +"discrimination", "discussion", "disdain", "disease", "disembodiment", "disengagement", "disguise", "disgust", "dish", "dishwasher", +"disk", "disparity", "dispatch", "displacement", "display", "disposal", "disposer", "disposition", "dispute", "disregard", "disruption", +"dissemination", "dissonance", "distance", "distinction", "distortion", "distribution", "distributor", "district", "divalent", "divan", +"diver", "diversity", "divide", "dividend", "divider", "divine", "diving", "division", "divorce", "doc", "dock", "doctor", "doctorate", +"doctrine", "document", "documentary", "documentation", "doe", "dog", "doggie", "dogsled", "dogwood", "doing", "doll", "dollar", "dollop", +"dolman", "dolor", "dolphin", "domain", "dome", "domination", "donation", "donkey", "donor", "donut", "door", "doorbell", "doorknob", +"doorpost", "doorway", "dory", "dose", "dot", "double", "doubling", "doubt", "doubter", "dough", "doughnut", "down", "downfall", "downforce", +"downgrade", "download", "downstairs", "downtown", "downturn", "dozen", "draft", "drag", "dragon", "dragonfly", "dragonfruit", "dragster", +"drain", "drainage", "drake", "drama", "dramaturge", "drapes", "draw", "drawbridge", "drawer", "drawing", "dream", "dreamer", "dredger", +"dress", "dresser", "dressing", "drill", "drink", "drinking", "drive", "driver", "driveway", "driving", "drizzle", "dromedary", "drop", +"drudgery", "drug", "drum", "drummer", "drunk", "dryer", "duck", "duckling", "dud", "dude", "due", "duel", "dueling", "duffel", "dugout", +"dulcimer", "dumbwaiter", "dump", "dump truck", "dune", "dune buggy", "dungarees", "dungeon", "duplexer", "duration", "durian", "dusk", +"dust", "dust storm", "duster", "duty", "dwarf", "dwell", "dwelling", "dynamics", "dynamite", "dynamo", "dynasty", "dysfunction", +"eagle", "eaglet", "ear", "eardrum", "earmuffs", "earnings", "earplug", "earring", "earrings", "earth", "earthquake", +"earthworm", "ease", "easel", "east", "eating", "eaves", "eavesdropper", "ecclesia", "echidna", "eclipse", "ecliptic", "ecology", "economics", +"economy", "ecosystem", "ectoderm", "ectodermal", "ecumenist", "eddy", "edge", "edger", "edible", "editing", "edition", "editor", "editorial", +"education", "eel", "effacement", "effect", "effective", "effectiveness", "effector", "efficacy", "efficiency", "effort", "egg", "egghead", +"eggnog", "eggplant", "ego", "eicosanoid", "ejector", "elbow", "elderberry", "election", "electricity", "electrocardiogram", "electronics", +"element", "elephant", "elevation", "elevator", "eleventh", "elf", "elicit", "eligibility", "elimination", "elite", "elixir", "elk", +"ellipse", "elm", "elongation", "elver", "email", "emanate", "embarrassment", "embassy", "embellishment", "embossing", "embryo", "emerald", +"emergence", "emergency", "emergent", "emery", "emission", "emitter", "emotion", "emphasis", "empire", "employ", "employee", "employer", +"employment", "empowerment", "emu", "enactment", "encirclement", "enclave", "enclosure", "encounter", "encouragement", "encyclopedia", +"end", "endive", "endoderm", "endorsement", "endothelium", "endpoint", "enemy", "energy", "enforcement", "engagement", "engine", "engineer", +"engineering", "enigma", "enjoyment", "enquiry", "enrollment", "enterprise", "entertainment", "enthusiasm", "entirety", "entity", "entrance", +"entree", "entrepreneur", "entry", "envelope", "environment", "envy", "enzyme", "epauliere", "epee", "ephemera", "ephemeris", "ephyra", +"epic", "episode", "epithelium", "epoch", "eponym", "epoxy", "equal", "equality", "equation", "equinox", "equipment", "equity", "equivalent", +"era", "eraser", "erection", "erosion", "error", "escalator", "escape", "escort", "espadrille", "espalier", "essay", "essence", "essential", +"establishment", "estate", "estimate", "estrogen", "estuary", "eternity", "ethernet", "ethics", "ethnicity", "ethyl", "euphonium", "eurocentrism", +"evaluation", "evaluator", "evaporation", "eve", "evening", "event", "everybody", "everyone", "everything", "eviction", +"evidence", "evil", "evocation", "evolution", "exaggeration", "exam", "examination", "examiner", "example", +"exasperation", "excellence", "exception", "excerpt", "excess", "exchange", "excitement", "exclamation", "excursion", "excuse", "execution", +"executive", "executor", "exercise", "exhaust", "exhaustion", "exhibit", "exhibition", "exile", "existence", "exit", "exocrine", "expansion", +"expansionism", "expectancy", "expectation", "expedition", "expense", "experience", "experiment", "experimentation", "expert", "expertise", +"explanation", "exploration", "explorer", "explosion", "export", "expose", "exposition", "exposure", "expression", "extension", "extent", +"exterior", "external", "extinction", "extreme", "extremist", "eye", "eyeball", "eyebrow", "eyebrows", "eyeglasses", "eyelash", "eyelashes", +"eyelid", "eyelids", "eyeliner", "eyestrain", "eyrie", "fabric", "face", "facelift", "facet", "facility", "facsimile", "fact", "factor", +"factory", "faculty", "fahrenheit", "fail", "failure", "fairness", "fairy", "faith", "faithful", "fall", "fallacy", "fame", +"familiar", "familiarity", "family", "fan", "fang", "fanlight", "fanny", "fantasy", "farm", "farmer", "farming", "farmland", +"farrow", "fascia", "fashion", "fat", "fate", "father", "fatigue", "fatigues", "faucet", "fault", "fav", "fava", "favor", +"favorite", "fawn", "fax", "fear", "feast", "feather", "feature", "fedelini", "federation", "fedora", "fee", "feed", "feedback", "feeding", +"feel", "feeling", "fellow", "felony", "female", "fen", "fence", "fencing", "fender", "feng", "fennel", "ferret", "ferry", "ferryboat", +"fertilizer", "festival", "fetus", "few", "fiber", "fiberglass", "fibre", "fibroblast", "fibrosis", "ficlet", "fiction", "fiddle", "field", +"fiery", "fiesta", "fifth", "fig", "fight", "fighter", "figure", "figurine", "file", "filing", "fill", "fillet", "filly", "film", "filter", +"filth", "final", "finance", "financing", "finding", "fine", "finer", "finger", "fingerling", "fingernail", "finish", "finisher", "fir", +"fire", "fireman", "fireplace", "firewall", "firm", "first", "fish", "fishbone", "fisherman", "fishery", "fishing", "fishmonger", "fishnet", +"fisting", "fit", "fitness", "fix", "fixture", "flag", "flair", "flame", "flan", "flanker", "flare", "flash", "flat", "flatboat", "flavor", +"flax", "fleck", "fledgling", "fleece", "flesh", "flexibility", "flick", "flicker", "flight", "flint", "flintlock", "flock", +"flood", "floodplain", "floor", "floozie", "flour", "flow", "flower", "flu", "flugelhorn", "fluke", "flume", "flung", "flute", "fly", +"flytrap", "foal", "foam", "fob", "focus", "fog", "fold", "folder", "folk", "folklore", "follower", "following", "fondue", "font", "food", +"foodstuffs", "fool", "foot", "footage", "football", "footnote", "footprint", "footrest", "footstep", "footstool", "footwear", "forage", +"forager", "foray", "force", "ford", "forearm", "forebear", "forecast", "forehead", "foreigner", "forelimb", "forest", "forestry", "forever", +"forgery", "fork", "form", "formal", "formamide", "format", "formation", "former", "formicarium", "formula", "fort", "forte", "fortnight", +"fortress", "fortune", "forum", "foundation", "founder", "founding", "fountain", "fourths", "fowl", "fox", "foxglove", "fraction", "fragrance", +"frame", "framework", "fratricide", "fraud", "fraudster", "freak", "freckle", "freedom", "freelance", "freezer", "freezing", "freight", +"freighter", "frenzy", "freon", "frequency", "fresco", "friction", "fridge", "friend", "friendship", "fries", "frigate", "fright", "fringe", +"fritter", "frock", "frog", "front", "frontier", "frost", "frosting", "frown", "fruit", "frustration", "fry", "fuel", "fugato", +"fulfillment", "full", "fun", "function", "functionality", "fund", "funding", "fundraising", "funeral", "fur", "furnace", "furniture", +"furry", "fusarium", "futon", "future", "gadget", "gaffe", "gaffer", "gain", "gaiters", "gale", "gallery", "galley", +"gallon", "galoshes", "gambling", "game", "gamebird", "gaming", "gander", "gang", "gap", "garage", "garb", "garbage", "garden", +"garlic", "garment", "garter", "gas", "gasket", "gasoline", "gasp", "gastronomy", "gastropod", "gate", "gateway", "gather", "gathering", +"gator", "gauge", "gauntlet", "gavel", "gazebo", "gazelle", "gear", "gearshift", "geek", "gel", "gelatin", "gelding", "gem", "gemsbok", +"gender", "gene", "general", "generation", "generator", "generosity", "genetics", "genie", "genius", "genocide", "genre", "gentleman", +"geography", "geology", "geometry", "geranium", "gerbil", "gesture", "geyser", "gherkin", "ghost", "giant", "gift", "gig", "gigantism", +"giggle", "ginger", "gingerbread", "ginseng", "giraffe", "girdle", "girl", "girlfriend", "git", "glacier", "gladiolus", "glance", "gland", +"glass", "glasses", "glee", "glen", "glider", "gliding", "glimpse", "globe", "glockenspiel", "gloom", "glory", "glove", "glow", "glucose", +"glue", "glut", "glutamate", "gnat", "gnu", "goal", "goat", "gobbler", "god", "goddess", "godfather", "godmother", "godparent", +"goggles", "going", "gold", "goldfish", "golf", "gondola", "gong", "good", "goodbye", "goodie", "goodness", "goodnight", +"goodwill", "goose", "gopher", "gorilla", "gosling", "gossip", "governance", "government", "governor", "gown", "grace", "grade", +"gradient", "graduate", "graduation", "graffiti", "graft", "grain", "gram", "grammar", "gran", "grand", "grandchild", "granddaughter", +"grandfather", "grandma", "grandmom", "grandmother", "grandpa", "grandparent", "grandson", "granny", "granola", "grant", "grape", "grapefruit", +"graph", "graphic", "grasp", "grass", "grasshopper", "grassland", "gratitude", "gravel", "gravitas", "gravity", "gravy", "gray", "grease", +"greatness", "greed", "green", "greenhouse", "greens", "grenade", "grey", "grid", "grief", +"grill", "grin", "grip", "gripper", "grit", "grocery", "ground", "group", "grouper", "grouse", "grove", "growth", "grub", "guacamole", +"guarantee", "guard", "guava", "guerrilla", "guess", "guest", "guestbook", "guidance", "guide", "guideline", "guilder", "guilt", "guilty", +"guinea", "guitar", "guitarist", "gum", "gumshoe", "gun", "gunpowder", "gutter", "guy", "gym", "gymnast", "gymnastics", "gynaecology", +"gyro", "habit", "habitat", "hacienda", "hacksaw", "hackwork", "hail", "hair", "haircut", "hake", "half", +"halibut", "hall", "halloween", "hallway", "halt", "ham", "hamburger", "hammer", "hammock", "hamster", "hand", "handball", +"handful", "handgun", "handicap", "handle", "handlebar", "handmaiden", "handover", "handrail", "handsaw", "hanger", "happening", "happiness", +"harald", "harbor", "harbour", "hardboard", "hardcover", "hardening", "hardhat", "hardship", "hardware", "hare", "harm", +"harmonica", "harmonise", "harmonize", "harmony", "harp", "harpooner", "harpsichord", "harvest", "harvester", "hash", "hashtag", "hassock", +"haste", "hat", "hatbox", "hatchet", "hatchling", "hate", "hatred", "haunt", "haven", "haversack", "havoc", "hawk", "hay", "haze", "hazel", +"hazelnut", "head", "headache", "headlight", "headline", "headphones", "headquarters", "headrest", "health", "hearing", +"hearsay", "heart", "heartache", "heartbeat", "hearth", "hearthside", "heartwood", "heat", "heater", "heating", "heaven", +"heavy", "hectare", "hedge", "hedgehog", "heel", "heifer", "height", "heir", "heirloom", "helicopter", "helium", "hell", "hellcat", "hello", +"helmet", "helo", "help", "hemisphere", "hemp", "hen", "hepatitis", "herb", "herbs", "heritage", "hermit", "hero", "heroine", "heron", +"herring", "hesitation", "hexagon", "heyday", "hiccups", "hide", "hierarchy", "high", "highland", "highlight", +"highway", "hike", "hiking", "hill", "hint", "hip", "hippodrome", "hippopotamus", "hire", "hiring", "historian", "history", "hit", "hive", +"hobbit", "hobby", "hockey", "hoe", "hog", "hold", "holder", "hole", "holiday", "home", "homeland", "homeownership", "hometown", "homework", +"homicide", "homogenate", "homonym", "honesty", "honey", "honeybee", "honeydew", "honor", "honoree", "hood", +"hoof", "hook", "hop", "hope", "hops", "horde", "horizon", "hormone", "horn", "hornet", "horror", "horse", "horseradish", "horst", "hose", +"hosiery", "hospice", "hospital", "hospitalisation", "hospitality", "hospitalization", "host", "hostel", "hostess", "hotdog", "hotel", +"hound", "hour", "hourglass", "house", "houseboat", "household", "housewife", "housework", "housing", "hovel", "hovercraft", "howard", +"howitzer", "hub", "hubcap", "hubris", "hug", "hugger", "hull", "human", "humanity", "humidity", "hummus", "humor", "humour", "hunchback", +"hundred", "hunger", "hunt", "hunter", "hunting", "hurdle", "hurdler", "hurricane", "hurry", "hurt", "husband", "hut", "hutch", "hyacinth", +"hybridisation", "hybridization", "hydrant", "hydraulics", "hydrocarb", "hydrocarbon", "hydrofoil", "hydrogen", "hydrolyse", "hydrolysis", +"hydrolyze", "hydroxyl", "hyena", "hygienic", "hype", "hyphenation", "hypochondria", "hypothermia", "hypothesis", "ice", +"iceberg", "icebreaker", "icecream", "icicle", "icing", "icon", "icy", "id", "idea", "ideal", "identification", "identity", "ideology", +"idiom", "idiot", "igloo", "ignorance", "ignorant", "ikebana", "illegal", "illiteracy", "illness", "illusion", "illustration", "image", +"imagination", "imbalance", "imitation", "immigrant", "immigration", "immortal", "impact", "impairment", "impala", "impediment", "implement", +"implementation", "implication", "import", "importance", "impostor", "impress", "impression", "imprisonment", "impropriety", "improvement", +"impudence", "impulse", "inability", "inauguration", "inbox", "incandescence", "incarnation", "incense", "incentive", +"inch", "incidence", "incident", "incision", "inclusion", "income", "incompetence", "inconvenience", "increase", "incubation", "independence", +"independent", "index", "indication", "indicator", "indigence", "individual", "industrialisation", "industrialization", "industry", "inequality", +"inevitable", "infancy", "infant", "infarction", "infection", "infiltration", "infinite", "infix", "inflammation", "inflation", "influence", +"influx", "info", "information", "infrastructure", "infusion", "inglenook", "ingrate", "ingredient", "inhabitant", "inheritance", "inhibition", +"inhibitor", "initial", "initialise", "initialize", "initiative", "injunction", "injury", "injustice", "ink", "inlay", "inn", "innervation", +"innocence", "innocent", "innovation", "input", "inquiry", "inscription", "insect", "insectarium", "insert", "inside", "insight", "insolence", +"insomnia", "inspection", "inspector", "inspiration", "installation", "instance", "instant", "instinct", "institute", "institution", +"instruction", "instructor", "instrument", "instrumentalist", "instrumentation", "insulation", "insurance", "insurgence", "insurrection", +"integer", "integral", "integration", "integrity", "intellect", "intelligence", "intensity", "intent", "intention", "intentionality", +"interaction", "interchange", "interconnection", "intercourse", "interest", "interface", "interferometer", "interior", "interject", "interloper", +"internet", "interpretation", "interpreter", "interval", "intervenor", "intervention", "interview", "interviewer", "intestine", "introduction", +"intuition", "invader", "invasion", "invention", "inventor", "inventory", "inverse", "inversion", "investigation", "investigator", "investment", +"investor", "invitation", "invite", "invoice", "involvement", "iridescence", "iris", "iron", "ironclad", "irony", "irrigation", "ischemia", +"island", "isogloss", "isolation", "issue", "item", "itinerary", "ivory", "jack", "jackal", "jacket", "jackfruit", "jade", "jaguar", +"jail", "jailhouse", "jalapeño", "jam", "jar", "jasmine", "jaw", "jazz", "jealousy", "jeans", "jeep", "jelly", "jellybeans", "jellyfish", +"jerk", "jet", "jewel", "jeweller", "jewellery", "jewelry", "jicama", "jiffy", "job", "jockey", "jodhpurs", "joey", "jogging", "joint", +"joke", "jot", "journal", "journalism", "journalist", "journey", "joy", "judge", "judgment", "judo", "jug", "juggernaut", "juice", "julienne", +"jumbo", "jump", "jumper", "jumpsuit", "jungle", "junior", "junk", "junker", "junket", "jury", "justice", "justification", "jute", "kale", +"kamikaze", "kangaroo", "karate", "kayak", "kazoo", "kebab", "keep", "keeper", "kendo", "kennel", "ketch", "ketchup", "kettle", "kettledrum", +"key", "keyboard", "keyboarding", "keystone", "kick", "kid", "kidney", "kielbasa", "kill", "killer", "killing", "kilogram", +"kilometer", "kilt", "kimono", "kinase", "kind", "kindness", "king", "kingdom", "kingfish", "kiosk", "kiss", "kit", "kitchen", "kite", +"kitsch", "kitten", "kitty", "kiwi", "knee", "kneejerk", "knickers", "knife", "knight", "knitting", "knock", "knot", +"knowledge", "knuckle", "koala", "kohlrabi", "kumquat", "lab", "label", "labor", "laboratory", "laborer", "labour", "labourer", "lace", +"lack", "lacquerware", "lad", "ladder", "ladle", "lady", "ladybug", "lag", "lake", "lamb", "lambkin", "lament", "lamp", "lanai", "land", +"landform", "landing", "landmine", "landscape", "lane", "language", "lantern", "lap", "laparoscope", "lapdog", "laptop", "larch", "lard", +"larder", "lark", "larva", "laryngitis", "lasagna", "lashes", "last", "latency", "latex", "lathe", "latitude", "latte", "latter", "laugh", +"laughter", "laundry", "lava", "law", "lawmaker", "lawn", "lawsuit", "lawyer", "lay", "layer", "layout", "lead", "leader", "leadership", +"leading", "leaf", "league", "leaker", "leap", "learning", "leash", "leather", "leave", "leaver", "lecture", "leek", "leeway", "left", +"leg", "legacy", "legal", "legend", "legging", "legislation", "legislator", "legislature", "legitimacy", "legume", "leisure", "lemon", +"lemonade", "lemur", "lender", "lending", "length", "lens", "lentil", "leopard", "leprosy", "leptocephalus", "lesson", "letter", +"lettuce", "level", "lever", "leverage", "leveret", "liability", "liar", "liberty", "libido", "library", "licence", "license", "licensing", +"licorice", "lid", "lie", "lieu", "lieutenant", "life", "lifestyle", "lifetime", "lift", "ligand", "light", "lighting", "lightning", +"lightscreen", "ligula", "likelihood", "likeness", "lilac", "lily", "limb", "lime", "limestone", "limit", "limitation", "limo", "line", +"linen", "liner", "linguist", "linguistics", "lining", "link", "linkage", "linseed", "lion", "lip", "lipid", "lipoprotein", "lipstick", +"liquid", "liquidity", "liquor", "list", "listening", "listing", "literate", "literature", "litigation", "litmus", "litter", "littleneck", +"liver", "livestock", "living", "lizard", "llama", "load", "loading", "loaf", "loafer", "loan", "lobby", "lobotomy", "lobster", "local", +"locality", "location", "lock", "locker", "locket", "locomotive", "locust", "lode", "loft", "log", "loggia", "logic", "login", "logistics", +"logo", "loincloth", "lollipop", "loneliness", "longboat", "longitude", "look", "lookout", "loop", "loophole", "loquat", "lord", "loss", +"lot", "lotion", "lottery", "lounge", "louse", "lout", "love", "lover", "lox", "loyalty", "luck", "luggage", "lumber", "lumberman", "lunch", +"luncheonette", "lunchmeat", "lunchroom", "lung", "lunge", "lust", "lute", "luxury", "lychee", "lycra", "lye", "lymphocyte", "lynx", +"lyocell", "lyre", "lyrics", "lysine", "mRNA", "macadamia", "macaroni", "macaroon", "macaw", "machine", "machinery", "macrame", "macro", +"macrofauna", "madam", "maelstrom", "maestro", "magazine", "maggot", "magic", "magnet", "magnitude", "maid", "maiden", "mail", "mailbox", +"mailer", "mailing", "mailman", "main", "mainland", "mainstream", "maintainer", "maintenance", "maize", "major", "majority", +"makeover", "maker", "makeup", "making", "male", "malice", "mall", "mallard", "mallet", "malnutrition", "mama", "mambo", "mammoth", "man", +"manacle", "management", "manager", "manatee", "mandarin", "mandate", "mandolin", "mangle", "mango", "mangrove", "manhunt", "maniac", +"manicure", "manifestation", "manipulation", "mankind", "manner", "manor", "mansard", "manservant", "mansion", "mantel", "mantle", "mantua", +"manufacturer", "manufacturing", "many", "map", "maple", "mapping", "maracas", "marathon", "marble", "march", "mare", "margarine", "margin", +"mariachi", "marimba", "marines", "marionberry", "mark", "marker", "market", "marketer", "marketing", "marketplace", "marksman", "markup", +"marmalade", "marriage", "marsh", "marshland", "marshmallow", "marten", "marxism", "mascara", "mask", "masonry", "mass", "massage", "mast", +"master", "masterpiece", "mastication", "mastoid", "mat", "match", "matchmaker", "mate", "material", "maternity", "math", "mathematics", +"matrix", "matter", "mattock", "mattress", "max", "maximum", "maybe", "mayonnaise", "mayor", "meadow", "meal", "mean", "meander", "meaning", +"means", "meantime", "measles", "measure", "measurement", "meat", "meatball", "meatloaf", "mecca", "mechanic", "mechanism", "med", "medal", +"media", "median", "medication", "medicine", "medium", "meet", "meeting", "melatonin", "melody", "melon", "member", "membership", "membrane", +"meme", "memo", "memorial", "memory", "men", "menopause", "menorah", "mention", "mentor", "menu", "merchandise", "merchant", "mercury", +"meridian", "meringue", "merit", "mesenchyme", "mess", "message", "messenger", "messy", "metabolite", "metal", "metallurgist", "metaphor", +"meteor", "meteorology", "meter", "methane", "method", "methodology", "metric", "metro", "metronome", "mezzanine", "microlending", "micronutrient", +"microphone", "microwave", "midden", "middle", "middleman", "midline", "midnight", "midwife", "might", "migrant", "migration", +"mile", "mileage", "milepost", "milestone", "military", "milk", "milkshake", "mill", "millennium", "millet", "millimeter", "million", +"millisecond", "millstone", "mime", "mimosa", "min", "mincemeat", "mind", "mine", "mineral", "mineshaft", "mini", "minibus", +"minimalism", "minimum", "mining", "minion", "minister", "mink", "minnow", "minor", "minority", "mint", "minute", "miracle", +"mirror", "miscarriage", "miscommunication", "misfit", "misnomer", "misogyny", "misplacement", "misreading", "misrepresentation", "miss", +"missile", "mission", "missionary", "mist", "mistake", "mister", "misunderstand", "miter", "mitten", "mix", "mixer", "mixture", "moai", +"moat", "mob", "mobile", "mobility", "mobster", "moccasins", "mocha", "mochi", "mode", "model", "modeling", "modem", "modernist", "modernity", +"modification", "molar", "molasses", "molding", "mole", "molecule", "mom", "moment", "monastery", "monasticism", "money", "monger", "monitor", +"monitoring", "monk", "monkey", "monocle", "monopoly", "monotheism", "monsoon", "monster", "month", "monument", "mood", "moody", "moon", +"moonlight", "moonscape", "moonshine", "moose", "mop", "morale", "morbid", "morbidity", "morning", "moron", "morphology", "morsel", "mortal", +"mortality", "mortgage", "mortise", "mosque", "mosquito", "most", "motel", "moth", "mother", "motion", "motivation", +"motive", "motor", "motorboat", "motorcar", "motorcycle", "mound", "mountain", "mouse", "mouser", "mousse", "moustache", "mouth", "mouton", +"movement", "mover", "movie", "mower", "mozzarella", "mud", "muffin", "mug", "mukluk", "mule", "multimedia", "murder", "muscat", "muscatel", +"muscle", "musculature", "museum", "mushroom", "music", "musician", "muskrat", "mussel", "mustache", "mustard", +"mutation", "mutt", "mutton", "mycoplasma", "mystery", "myth", "mythology", "nail", "name", "naming", "nanoparticle", "napkin", "narrative", +"nasal", "nation", "nationality", "native", "naturalisation", "nature", "navigation", "necessity", "neck", "necklace", "necktie", "nectar", +"nectarine", "need", "needle", "neglect", "negligee", "negotiation", "neighbor", "neighborhood", "neighbour", "neighbourhood", "neologism", +"neon", "neonate", "nephew", "nerve", "nest", "nestling", "nestmate", "net", "netball", "netbook", "netsuke", "network", "networking", +"neurobiologist", "neuron", "neuropathologist", "neuropsychiatry", "news", "newsletter", "newspaper", "newsprint", "newsstand", "nexus", +"nibble", "nicety", "niche", "nick", "nickel", "nickname", "niece", "night", "nightclub", "nightgown", "nightingale", "nightlife", "nightlight", +"nightmare", "ninja", "nit", "nitrogen", "nobody", "nod", "node", "noir", "noise", "nonbeliever", "nonconformist", "nondisclosure", "nonsense", +"noodle", "noodles", "noon", "norm", "normal", "normalisation", "normalization", "north", "nose", "notation", "note", "notebook", "notepad", +"nothing", "notice", "notion", "notoriety", "nougat", "noun", "nourishment", "novel", "nucleotidase", "nucleotide", "nudge", "nuke", +"number", "numeracy", "numeric", "numismatist", "nun", "nurse", "nursery", "nursing", "nurture", "nut", "nutmeg", "nutrient", "nutrition", +"nylon", "nymph", "oak", "oar", "oasis", "oat", "oatmeal", "oats", "obedience", "obesity", "obi", "object", "objection", "objective", +"obligation", "oboe", "observation", "observatory", "obsession", "obsidian", "obstacle", "occasion", "occupation", "occurrence", "ocean", +"ocelot", "octagon", "octave", "octavo", "octet", "octopus", "odometer", "odyssey", "oeuvre", "offence", "offense", "offer", +"offering", "office", "officer", "official", "offset", "oil", "okra", "oldie", "oleo", "olive", "omega", "omelet", "omission", "omnivore", +"oncology", "onion", "online", "onset", "opening", "opera", "operating", "operation", "operator", "ophthalmologist", "opinion", "opium", +"opossum", "opponent", "opportunist", "opportunity", "opposite", "opposition", "optimal", "optimisation", "optimist", "optimization", +"option", "orange", "orangutan", "orator", "orchard", "orchestra", "orchid", "order", "ordinary", "ordination", "ore", "oregano", "organ", +"organisation", "organising", "organization", "organizing", "orient", "orientation", "origin", "original", "originality", "ornament", +"osmosis", "osprey", "ostrich", "other", "otter", "ottoman", "ounce", "outback", "outcome", "outfielder", "outfit", "outhouse", "outlaw", +"outlay", "outlet", "outline", "outlook", "output", "outrage", "outrigger", "outrun", "outset", "outside", "oval", "ovary", "oven", "overcharge", +"overclocking", "overcoat", "overexertion", "overflight", "overhead", "overheard", "overload", "overnighter", "overshoot", "oversight", +"overview", "overweight", "owl", "owner", "ownership", "ox", "oxford", "oxygen", "oyster", "ozone", "pace", "pacemaker", "pack", "package", +"packaging", "packet", "pad", "paddle", "paddock", "pagan", "page", "pagoda", "pail", "pain", "paint", "painter", "painting", "paintwork", +"pair", "pajamas", "palace", "palate", "palm", "pamphlet", "pan", "pancake", "pancreas", "panda", "panel", "panic", "pannier", "panpipe", +"pansy", "panther", "panties", "pantologist", "pantology", "pantry", "pants", "pantsuit", "panty", "pantyhose", "papa", "papaya", "paper", +"paperback", "paperwork", "parable", "parachute", "parade", "paradise", "paragraph", "parallelogram", "paramecium", "paramedic", "parameter", +"paranoia", "parcel", "parchment", "pard", "pardon", "parent", "parenthesis", "parenting", "park", "parka", "parking", "parliament", +"parole", "parrot", "parser", "parsley", "parsnip", "part", "participant", "participation", "particle", "particular", "partner", "partnership", +"partridge", "party", "pass", "passage", "passbook", "passenger", "passing", "passion", "passive", "passport", "password", "past", "pasta", +"paste", "pastor", "pastoralist", "pastry", "pasture", "pat", "patch", "pate", "patent", "patentee", "path", "pathogenesis", "pathology", +"pathway", "patience", "patient", "patina", "patio", "patriarch", "patrimony", "patriot", "patrol", "patroller", "patrolling", "patron", +"pattern", "patty", "pattypan", "pause", "pavement", "pavilion", "paw", "pawnshop", "pay", "payee", "payment", "payoff", "pea", "peace", +"peach", "peacoat", "peacock", "peak", "peanut", "pear", "pearl", "peasant", "pecan", "pecker", "pedal", "peek", "peen", "peer", +"pegboard", "pelican", "pelt", "pen", "penalty", "pence", "pencil", "pendant", "pendulum", "penguin", "penicillin", "peninsula", "penis", +"pennant", "penny", "pension", "pentagon", "peony", "people", "pepper", "pepperoni", "percent", "percentage", "perception", "perch", +"perennial", "perfection", "performance", "perfume", "period", "periodical", "peripheral", "permafrost", "permission", "permit", "perp", +"perpendicular", "persimmon", "person", "personal", "personality", "personnel", "perspective", "pest", "pet", "petal", "petition", "petitioner", +"petticoat", "pew", "pharmacist", "pharmacopoeia", "phase", "pheasant", "phenomenon", "phenotype", "pheromone", "philanthropy", "philosopher", +"philosophy", "phone", "phosphate", "photo", "photodiode", "photograph", "photographer", "photography", "photoreceptor", "phrase", "phrasing", +"physical", "physics", "physiology", "pianist", "piano", "piccolo", "pick", "pickax", "pickaxe", "picket", "pickle", "pickup", "picnic", +"picture", "picturesque", "pie", "piece", "pier", "piety", "pig", "pigeon", "piglet", "pigpen", "pigsty", "pike", "pilaf", "pile", "pilgrim", +"pilgrimage", "pill", "pillar", "pillbox", "pillow", "pilot", "pimp", "pimple", "pin", "pinafore", "pine", "pineapple", +"pinecone", "ping", "pink", "pinkie", "pinot", "pinstripe", "pint", "pinto", "pinworm", "pioneer", "pipe", "pipeline", "piracy", "pirate", +"pistol", "pit", "pita", "pitch", "pitcher", "pitching", "pith", "pizza", "place", "placebo", "placement", "placode", "plagiarism", +"plain", "plaintiff", "plan", "plane", "planet", "planning", "plant", "plantation", "planter", "planula", "plaster", "plasterboard", +"plastic", "plate", "platelet", "platform", "platinum", "platter", "platypus", "play", "player", "playground", "playroom", "playwright", +"plea", "pleasure", "pleat", "pledge", "plenty", "plier", "pliers", "plight", "plot", "plough", "plover", "plow", "plowman", "plug", +"plugin", "plum", "plumber", "plume", "plunger", "plywood", "pneumonia", "pocket", "pocketbook", "pod", "podcast", "poem", +"poet", "poetry", "poignance", "point", "poison", "poisoning", "poker", "polarisation", "polarization", "pole", "polenta", "police", +"policeman", "policy", "polish", "politician", "politics", "poll", "polliwog", "pollutant", "pollution", "polo", "polyester", "polyp", +"pomegranate", "pomelo", "pompom", "poncho", "pond", "pony", "pool", "poor", "pop", "popcorn", "poppy", "popsicle", "popularity", "population", +"populist", "porcelain", "porch", "porcupine", "pork", "porpoise", "port", "porter", "portfolio", "porthole", "portion", "portrait", +"position", "possession", "possibility", "possible", "post", "postage", "postbox", "poster", "posterior", "postfix", "pot", "potato", +"potential", "pottery", "potty", "pouch", "poultry", "pound", "pounding", "poverty", "powder", "power", "practice", "practitioner", "prairie", +"praise", "pray", "prayer", "precedence", "precedent", "precipitation", "precision", "predecessor", "preface", "preference", "prefix", +"pregnancy", "prejudice", "prelude", "premeditation", "premier", "premise", "premium", "preoccupation", "preparation", "prescription", +"presence", "present", "presentation", "preservation", "preserves", "presidency", "president", "press", "pressroom", "pressure", "pressurisation", +"pressurization", "prestige", "presume", "pretzel", "prevalence", "prevention", "prey", "price", "pricing", "pride", "priest", "priesthood", +"primary", "primate", "prince", "princess", "principal", "principle", "print", "printer", "printing", "prior", "priority", "prison", +"prisoner", "privacy", "private", "privilege", "prize", "prizefight", "probability", "probation", "probe", "problem", "procedure", "proceedings", +"process", "processing", "processor", "proctor", "procurement", "produce", "producer", "product", "production", "productivity", "profession", +"professional", "professor", "profile", "profit", "progenitor", "program", "programme", "programming", "progress", "progression", "prohibition", +"project", "proliferation", "promenade", "promise", "promotion", "prompt", "pronoun", "pronunciation", "proof", "propaganda", +"propane", "property", "prophet", "proponent", "proportion", "proposal", "proposition", "proprietor", "prose", "prosecution", "prosecutor", +"prospect", "prosperity", "prostacyclin", "prostanoid", "prostrate", "protection", "protein", "protest", "protocol", "providence", "provider", +"province", "provision", "prow", "proximal", "proximity", "prune", "pruner", "pseudocode", "pseudoscience", "psychiatrist", "psychoanalyst", +"psychologist", "psychology", "ptarmigan", "pub", "public", "publication", "publicity", "publisher", "publishing", "pudding", "puddle", +"puffin", "pug", "puggle", "pulley", "pulse", "puma", "pump", "pumpernickel", "pumpkin", "pumpkinseed", "pun", "punch", "punctuation", +"punishment", "pup", "pupa", "pupil", "puppet", "puppy", "purchase", "puritan", "purity", "purple", "purpose", "purr", "purse", "pursuit", +"push", "pusher", "put", "puzzle", "pyramid", "pyridine", "quadrant", "quail", "qualification", "quality", "quantity", "quart", "quarter", +"quartet", "quartz", "queen", "query", "quest", "question", "questioner", "questionnaire", "quiche", "quicksand", "quiet", "quill", "quilt", +"quince", "quinoa", "quit", "quiver", "quota", "quotation", "quote", "rabbi", "rabbit", "raccoon", "race", "racer", "racing", "racism", +"racist", "rack", "radar", "radiator", "radio", "radiosonde", "radish", "raffle", "raft", "rag", "rage", "raid", "rail", "railing", "railroad", +"railway", "raiment", "rain", "rainbow", "raincoat", "rainmaker", "rainstorm", "rainy", "raise", "raisin", "rake", "rally", "ram", "rambler", +"ramen", "ramie", "ranch", "rancher", "randomisation", "randomization", "range", "ranger", "rank", "rap", "rape", "raspberry", "rat", +"rate", "ratepayer", "rating", "ratio", "rationale", "rations", "raven", "ravioli", "rawhide", "ray", "rayon", "razor", "reach", "reactant", +"reaction", "read", "reader", "readiness", "reading", "real", "reality", "realization", "realm", "reamer", "rear", "reason", "reasoning", +"rebel", "rebellion", "reboot", "recall", "recapitulation", "receipt", "receiver", "reception", "receptor", "recess", "recession", "recipe", +"recipient", "reciprocity", "reclamation", "recliner", "recognition", "recollection", "recommendation", "reconsideration", "record", +"recorder", "recording", "recovery", "recreation", "recruit", "rectangle", "red", "redesign", "redhead", "redirect", "rediscovery", "reduction", +"reef", "refectory", "reference", "referendum", "reflection", "reform", "refreshments", "refrigerator", "refuge", "refund", "refusal", +"refuse", "regard", "regime", "region", "regionalism", "register", "registration", "registry", "regret", "regulation", "regulator", +"rehospitalization", "reindeer", "reinscription", "reject", "relation", "relationship", "relative", "relaxation", "relay", "release", +"reliability", "relief", "religion", "relish", "reluctance", "remains", "remark", "reminder", "remnant", "remote", "removal", "renaissance", +"rent", "reorganisation", "reorganization", "repair", "reparation", "repayment", "repeat", "replacement", "replica", "replication", "reply", +"report", "reporter", "reporting", "repository", "representation", "representative", "reprocessing", "republic", "republican", "reputation", +"request", "requirement", "resale", "rescue", "research", "researcher", "resemblance", "reservation", "reserve", "reservoir", "reset", +"residence", "resident", "residue", "resist", "resistance", "resolution", "resolve", "resort", "resource", "respect", "respite", "response", +"responsibility", "rest", "restaurant", "restoration", "restriction", "restroom", "restructuring", "result", "resume", "retailer", "retention", +"rethinking", "retina", "retirement", "retouching", "retreat", "retrospect", "retrospective", "retrospectivity", "return", "reunion", +"revascularisation", "revascularization", "reveal", "revelation", "revenant", "revenge", "revenue", "reversal", "reverse", "review", +"revitalisation", "revitalization", "revival", "revolution", "revolver", "reward", "rhetoric", "rheumatism", "rhinoceros", "rhubarb", +"rhyme", "rhythm", "rib", "ribbon", "rice", "riddle", "ride", "rider", "ridge", "riding", "rifle", "right", "rim", "ring", "ringworm", +"riot", "rip", "ripple", "rise", "riser", "risk", "rite", "ritual", "river", "riverbed", "rivulet", "road", "roadway", "roar", "roast", +"robe", "robin", "robot", "robotics", "rock", "rocker", "rocket", "rod", "role", "roll", "roller", "romaine", "romance", +"roof", "room", "roommate", "rooster", "root", "rope", "rose", "rosemary", "roster", "rostrum", "rotation", "round", "roundabout", "route", +"router", "routine", "row", "rowboat", "rowing", "rubber", "rubric", "ruby", "ruckus", "rudiment", "ruffle", "rug", "rugby", +"ruin", "rule", "ruler", "ruling", "rum", "rumor", "run", "runaway", "runner", "running", "runway", "rush", "rust", "rutabaga", "rye", +"sabre", "sac", "sack", "saddle", "sadness", "safari", "safe", "safeguard", "safety", "saffron", "sage", "sail", "sailboat", "sailing", +"sailor", "saint", "sake", "salad", "salami", "salary", "sale", "salesman", "salmon", "salon", "saloon", "salsa", "salt", "salute", "samovar", +"sampan", "sample", "samurai", "sanction", "sanctity", "sanctuary", "sand", "sandal", "sandbar", "sandpaper", "sandwich", "sanity", "sardine", +"sari", "sarong", "sash", "satellite", "satin", "satire", "satisfaction", "sauce", "saucer", "sauerkraut", "sausage", "savage", "savannah", +"saving", "savings", "savior", "saviour", "savory", "saw", "saxophone", "scaffold", "scale", "scallion", "scallops", "scalp", "scam", +"scanner", "scarecrow", "scarf", "scarification", "scenario", "scene", "scenery", "scent", "schedule", "scheduling", "schema", "scheme", +"schizophrenic", "schnitzel", "scholar", "scholarship", "school", "schoolhouse", "schooner", "science", "scientist", "scimitar", "scissors", +"scooter", "scope", "score", "scorn", "scorpion", "scotch", "scout", "scow", "scrambled", "scrap", "scraper", "scratch", "screamer", +"screen", "screening", "screenwriting", "screw", "screwdriver", "scrim", "scrip", "script", "scripture", "scrutiny", "sculpting", +"sculptural", "sculpture", "sea", "seabass", "seafood", "seagull", "seal", "seaplane", "search", "seashore", "seaside", "season", "seat", +"seaweed", "second", "secrecy", "secret", "secretariat", "secretary", "secretion", "section", "sectional", "sector", "security", "sediment", +"seed", "seeder", "seeker", "seep", "segment", "seizure", "selection", "self", "seller", +"selling", "semantics", "semester", "semicircle", "semicolon", "semiconductor", "seminar", "senate", "senator", "sender", "senior", "sense", +"sensibility", "sensitive", "sensitivity", "sensor", "sentence", "sentencing", "sentiment", "sepal", "separation", "septicaemia", "sequel", +"sequence", "serial", "series", "sermon", "serum", "serval", "servant", "server", "service", "servitude", "sesame", "session", "set", +"setback", "setting", "settlement", "settler", "severity", "sewer", "sex", "sexuality", "shack", "shackle", "shade", "shadow", "shadowbox", +"shakedown", "shaker", "shallot", "shallows", "shame", "shampoo", "shanty", "shape", "share", "shareholder", "shark", "shaw", "shawl", +"shear", "shearling", "sheath", "shed", "sheep", "sheet", "shelf", "shell", "shelter", "sherbet", "sherry", "shield", "shift", "shin", +"shine", "shingle", "ship", "shipper", "shipping", "shipyard", "shirt", "shirtdress", "shoat", "shock", "shoe", +"shoehorn", "shoelace", "shoemaker", "shoes", "shoestring", "shofar", "shoot", "shootdown", "shop", "shopper", "shopping", "shore", "shoreline", +"short", "shortage", "shorts", "shortwave", "shot", "shoulder", "shout", "shovel", "show", "shower", "shred", "shrimp", +"shrine", "shutdown", "sibling", "sick", "sickness", "side", "sideboard", "sideburns", "sidecar", "sidestream", "sidewalk", "siding", +"siege", "sigh", "sight", "sightseeing", "sign", "signal", "signature", "signet", "significance", "signify", "signup", "silence", "silica", +"silicon", "silk", "silkworm", "sill", "silly", "silo", "silver", "similarity", "simple", "simplicity", "simplification", "simvastatin", +"sin", "singer", "singing", "singular", "sink", "sinuosity", "sip", "sir", "sister", "sitar", "site", "situation", "size", +"skate", "skating", "skean", "skeleton", "ski", "skiing", "skill", "skin", "skirt", "skull", "skullcap", "skullduggery", "skunk", "sky", +"skylight", "skyline", "skyscraper", "skywalk", "slang", "slapstick", "slash", "slate", "slavery", "slaw", "sled", "sledge", +"sleep", "sleepiness", "sleeping", "sleet", "sleuth", "slice", "slide", "slider", "slime", "slip", "slipper", "slippers", "slope", "slot", +"sloth", "slump", "smell", "smelting", "smile", "smith", "smock", "smog", "smoke", "smoking", "smolt", "smuggling", "snack", "snail", +"snake", "snakebite", "snap", "snarl", "sneaker", "sneakers", "sneeze", "sniffle", "snob", "snorer", "snow", "snowboarding", "snowflake", +"snowman", "snowmobiling", "snowplow", "snowstorm", "snowsuit", "snuck", "snug", "snuggle", "soap", "soccer", "socialism", "socialist", +"society", "sociology", "sock", "socks", "soda", "sofa", "softball", "softdrink", "softening", "software", "soil", "soldier", "sole", +"solicitation", "solicitor", "solidarity", "solidity", "soliloquy", "solitaire", "solution", "solvency", "sombrero", "somebody", "someone", +"someplace", "somersault", "something", "somewhere", "son", "sonar", "sonata", "song", "songbird", "sonnet", "soot", "sophomore", "soprano", +"sorbet", "sorghum", "sorrel", "sorrow", "sort", "soul", "soulmate", "sound", "soundness", "soup", "source", "sourwood", "sousaphone", +"south", "southeast", "souvenir", "sovereignty", "sow", "soy", "soybean", "space", "spacing", "spade", "spaghetti", "span", "spandex", +"spank", "sparerib", "spark", "sparrow", "spasm", "spat", "spatula", "spawn", "speaker", "speakerphone", "speaking", "spear", "spec", +"special", "specialist", "specialty", "species", "specification", "spectacle", "spectacles", "spectrograph", "spectrum", "speculation", +"speech", "speed", "speedboat", "spell", "spelling", "spelt", "spending", "sphere", "sphynx", "spice", "spider", "spiderling", "spike", +"spill", "spinach", "spine", "spiral", "spirit", "spiritual", "spirituality", "spit", "spite", "spleen", "splendor", "split", "spokesman", +"spokeswoman", "sponge", "sponsor", "sponsorship", "spool", "spoon", "spork", "sport", "sportsman", "spot", "spotlight", "spouse", "sprag", +"sprat", "spray", "spread", "spreadsheet", "spree", "spring", "sprinkles", "sprinter", "sprout", "spruce", "spud", "spume", "spur", "spy", +"spyglass", "square", "squash", "squatter", "squeegee", "squid", "squirrel", "stab", "stability", "stable", "stack", "stacking", "stadium", +"staff", "stag", "stage", "stain", "stair", "staircase", "stake", "stalk", "stall", "stallion", "stamen", "stamina", "stamp", "stance", +"stand", "standard", "standardisation", "standardization", "standing", "standoff", "standpoint", "star", "starboard", "start", "starter", +"state", "statement", "statin", "station", "statistic", "statistics", "statue", "status", "statute", "stay", "steak", +"stealth", "steam", "steamroller", "steel", "steeple", "stem", "stench", "stencil", "step", +"stepdaughter", "stepmother", +"stepson", "stereo", "stew", "steward", "stick", "sticker", "stiletto", "still", "stimulation", "stimulus", "sting", +"stinger", "stitch", "stitcher", "stock", "stockings", "stole", "stomach", "stone", "stonework", "stool", +"stop", "stopsign", "stopwatch", "storage", "store", "storey", "storm", "story", "storyboard", "stot", "stove", "strait", +"strand", "stranger", "strap", "strategy", "straw", "strawberry", "strawman", "stream", "street", "streetcar", "strength", "stress", +"stretch", "strife", "strike", "string", "strip", "stripe", "strobe", "stroke", "structure", "strudel", "struggle", "stucco", "stud", +"student", "studio", "study", "stuff", "stumbling", "stump", "stupidity", "sturgeon", "sty", "style", "styling", "stylus", "sub", "subcomponent", +"subconscious", "subcontractor", "subexpression", "subgroup", "subject", "submarine", "submitter", "subprime", "subroutine", "subscription", +"subsection", "subset", "subsidence", "subsidiary", "subsidy", "substance", "substitution", "subtitle", "suburb", "subway", "success", +"succotash", "suck", "sucker", "suede", "suet", "suffocation", "sugar", "suggestion", "suicide", "suit", "suitcase", "suite", "sulfur", +"sultan", "sum", "summary", "summer", "summit", "sun", "sunbeam", "sunbonnet", "sundae", "sunday", "sundial", "sunflower", "sunglasses", +"sunlamp", "sunlight", "sunrise", "sunroom", "sunset", "sunshine", "superiority", "supermarket", "supernatural", "supervision", "supervisor", +"supper", "supplement", "supplier", "supply", "support", "supporter", "suppression", "supreme", "surface", "surfboard", "surge", "surgeon", +"surgery", "surname", "surplus", "surprise", "surround", "surroundings", "surrounds", "survey", "survival", "survivor", "sushi", "suspect", +"suspenders", "suspension", "sustainment", "sustenance", "swallow", "swamp", "swan", "swanling", "swath", "sweat", "sweater", "sweatshirt", +"sweatshop", "sweatsuit", "sweets", "swell", "swim", "swimming", "swimsuit", "swine", "swing", "switch", "switchboard", "switching", +"swivel", "sword", "swordfight", "swordfish", "sycamore", "symbol", "symmetry", "sympathy", "symptom", "syndicate", "syndrome", "synergy", +"synod", "synonym", "synthesis", "syrup", "system", "tab", "tabby", "tabernacle", "table", "tablecloth", "tablet", "tabletop", +"tachometer", "tackle", "taco", "tactics", "tactile", "tadpole", "tag", "tail", "tailbud", "tailor", "tailspin", "takeover", +"tale", "talent", "talk", "talking", "tamale", "tambour", "tambourine", "tan", "tandem", "tangerine", "tank", +"tanker", "tankful", "tap", "tape", "tapioca", "target", "taro", "tarragon", "tart", "task", "tassel", "taste", "tatami", "tattler", +"tattoo", "tavern", "tax", "taxi", "taxicab", "taxpayer", "tea", "teacher", "teaching", "team", "teammate", "teapot", "tear", "tech", +"technician", "technique", "technologist", "technology", "tectonics", "teen", "teenager", "teepee", "telephone", "telescreen", "teletype", +"television", "tell", "teller", "temp", "temper", "temperature", "temple", "tempo", "temporariness", "temporary", "temptation", "temptress", +"tenant", "tendency", "tender", "tenement", "tenet", "tennis", "tenor", "tension", "tensor", "tent", "tentacle", "tenth", "tepee", "teriyaki", +"term", "terminal", "termination", "terminology", "termite", "terrace", "terracotta", "terrapin", "terrarium", "territory", "terror", +"terrorism", "terrorist", "test", "testament", "testimonial", "testimony", "testing", "text", "textbook", "textual", "texture", "thanks", +"thaw", "theater", "theft", "theism", "theme", "theology", "theory", "therapist", "therapy", "thermals", "thermometer", "thermostat", +"thesis", "thickness", "thief", "thigh", "thing", "thinking", "thirst", "thistle", "thong", "thongs", "thorn", "thought", "thousand", +"thread", "threat", "threshold", "thrift", "thrill", "throat", "throne", "thrush", "thrust", "thug", "thumb", "thump", "thunder", "thunderbolt", +"thunderhead", "thunderstorm", "thyme", "tiara", "tic", "tick", "ticket", "tide", "tie", "tiger", "tights", "tile", "till", "tilt", "timbale", +"timber", "time", "timeline", "timeout", "timer", "timetable", "timing", "timpani", "tin", "tinderbox", "tinkle", "tintype", "tip", "tire", +"tissue", "titanium", "title", "toad", "toast", "toaster", "tobacco", "today", "toe", "toenail", "toffee", "tofu", "tog", "toga", "toilet", +"tolerance", "tolerant", "toll", "tomatillo", "tomato", "tomb", "tomography", "tomorrow", "ton", "tonality", "tone", "tongue", +"tonic", "tonight", "tool", "toot", "tooth", "toothbrush", "toothpaste", "toothpick", "top", "topic", "topsail", "toque", +"toreador", "tornado", "torso", "torte", "tortellini", "tortilla", "tortoise", "tosser", "total", "tote", "touch", "tour", +"tourism", "tourist", "tournament", "towel", "tower", "town", "townhouse", "township", "toy", "trace", "trachoma", "track", +"tracking", "tracksuit", "tract", "tractor", "trade", "trader", "trading", "tradition", "traditionalism", "traffic", "trafficker", "tragedy", +"trail", "trailer", "trailpatrol", "train", "trainer", "training", "trait", "tram", "tramp", "trance", "transaction", "transcript", "transfer", +"transformation", "transit", "transition", "translation", "transmission", "transom", "transparency", "transplantation", "transport", +"transportation", "trap", "trapdoor", "trapezium", "trapezoid", "trash", "travel", "traveler", "tray", "treasure", "treasury", "treat", +"treatment", "treaty", "tree", "trek", "trellis", "tremor", "trench", "trend", "triad", "trial", "triangle", "tribe", "tributary", "trick", +"trigger", "trigonometry", "trillion", "trim", "trinket", "trip", "tripod", "tritone", "triumph", "trolley", "trombone", "troop", "trooper", +"trophy", "trouble", "trousers", "trout", "trove", "trowel", "truck", "trumpet", "trunk", "trust", "trustee", "truth", "try", "tsunami", +"tub", "tuba", "tube", "tuber", "tug", "tugboat", "tuition", "tulip", "tumbler", "tummy", "tuna", "tune", "tunic", "tunnel", +"turban", "turf", "turkey", "turmeric", "turn", "turning", "turnip", "turnover", "turnstile", "turret", "turtle", "tusk", "tussle", "tutu", +"tuxedo", "tweet", "tweezers", "twig", "twilight", "twine", "twins", "twist", "twister", "twitter", "type", "typeface", "typewriter", +"typhoon", "ukulele", "ultimatum", "umbrella", "unblinking", "uncertainty", "uncle", "underclothes", "underestimate", "underground", +"underneath", "underpants", "underpass", "undershirt", "understanding", "understatement", "undertaker", "underwear", "underweight", "underwire", +"underwriting", "unemployment", "unibody", "uniform", "uniformity", "union", "unique", "unit", "unity", "universe", "university", "update", +"upgrade", "uplift", "upper", "upstairs", "upward", "urge", "urgency", "urn", "usage", "use", "user", "usher", "usual", "utensil", "utilisation", +"utility", "utilization", "vacation", "vaccine", "vacuum", "vagrant", "valance", "valentine", "validate", "validity", "valley", "valuable", +"value", "vampire", "van", "vanadyl", "vane", "vanilla", "vanity", "variability", "variable", "variant", "variation", "variety", "vascular", +"vase", "vault", "vaulting", "veal", "vector", "vegetable", "vegetarian", "vegetarianism", "vegetation", "vehicle", "veil", "vein", "veldt", +"vellum", "velocity", "velodrome", "velvet", "vendor", "veneer", "vengeance", "venison", "venom", "venti", "venture", "venue", "veranda", +"verb", "verdict", "verification", "vermicelli", "vernacular", "verse", "version", "vertigo", "verve", "vessel", "vest", "vestment", +"vet", "veteran", "veterinarian", "veto", "viability", "vibe", "vibraphone", "vibration", "vibrissae", "vice", "vicinity", "victim", +"victory", "video", "view", "viewer", "vignette", "villa", "village", "vine", "vinegar", "vineyard", "vintage", "vintner", "vinyl", "viola", +"violation", "violence", "violet", "violin", "virginal", "virtue", "virus", "visa", "viscose", "vise", "vision", "visit", "visitor", +"visor", "vista", "visual", "vitality", "vitamin", "vitro", "vivo", "vixen", "vodka", "vogue", "voice", "void", "vol", "volatility", +"volcano", "volleyball", "volume", "volunteer", "volunteering", "vomit", "vote", "voter", "voting", "voyage", "vulture", "wad", "wafer", +"waffle", "wage", "wagon", "waist", "waistband", "wait", "waiter", "waiting", "waitress", "waiver", "wake", "walk", "walker", "walking", +"walkway", "wall", "wallaby", "wallet", "walnut", "walrus", "wampum", "wannabe", "want", "war", "warden", "wardrobe", "warfare", "warlock", +"warlord", "warming", "warmth", "warning", "warrant", "warren", "warrior", "wasabi", "wash", "washbasin", "washcloth", "washer", +"washtub", "wasp", "waste", "wastebasket", "wasting", "watch", "watcher", "watchmaker", "water", "waterbed", "watercress", "waterfall", +"waterfront", "watermelon", "waterskiing", "waterspout", "waterwheel", "wave", "waveform", "wax", "way", "weakness", "wealth", "weapon", +"wear", "weasel", "weather", "web", "webinar", "webmail", "webpage", "website", "wedding", "wedge", "weed", "weeder", "weedkiller", "week", +"weekend", "weekender", "weight", "weird", "welcome", "welfare", "well", "west", "western", "wetland", "wetsuit", +"whack", "whale", "wharf", "wheat", "wheel", "whelp", "whey", "whip", "whirlpool", "whirlwind", "whisker", "whiskey", "whisper", "whistle", +"white", "whole", "wholesale", "wholesaler", "whorl", "wick", "widget", "widow", "width", "wife", "wifi", "wild", "wildebeest", "wilderness", +"wildlife", "will", "willingness", "willow", "win", "wind", "windage", "window", "windscreen", "windshield", "wine", "winery", +"wing", "wingman", "wingtip", "wink", "winner", "winter", "wire", "wiretap", "wiring", "wisdom", "wiseguy", "wish", "wisteria", "wit", +"witch", "withdrawal", "witness", "wok", "wolf", "woman", "wombat", "wonder", "wont", "wood", "woodchuck", "woodland", +"woodshed", "woodwind", "wool", "woolens", "word", "wording", "work", "workbench", "worker", "workforce", "workhorse", "working", "workout", +"workplace", "workshop", "world", "worm", "worry", "worship", "worshiper", "worth", "wound", "wrap", "wraparound", "wrapper", "wrapping", +"wreck", "wrecker", "wren", "wrench", "wrestler", "wriggler", "wrinkle", "wrist", "writer", "writing", "wrong", "xylophone", "yacht", +"yahoo", "yak", "yam", "yang", "yard", "yarmulke", "yarn", "yawl", "year", "yeast", "yellow", "yellowjacket", "yesterday", "yew", "yin", +"yoga", "yogurt", "yoke", "yolk", "young", "youngster", "yourself", "youth", "yoyo", "yurt", "zampone", "zebra", "zebrafish", "zen", +"zephyr", "zero", "ziggurat", "zinc", "zipper", "zither", "zombie", "zone", "zoo", "zoologist", "zoology", "zucchini" +}; + + +std::string_view obfuscateWord(std::string_view src, WordMap & obfuscate_map, WordSet & used_nouns, SipHash hash_func) +{ + /// Prevent using too many nouns + if (obfuscate_map.size() * 2 > nouns.size()) + throw Exception("Too many unique identifiers in queries", ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS); + + std::string_view & mapped = obfuscate_map[src]; + if (!mapped.empty()) + return mapped; + + hash_func.update(src.data(), src.size()); + std::string_view noun = nouns.begin()[hash_func.get64() % nouns.size()]; + + /// Prevent collisions + while (!used_nouns.insert(noun).second) + { + hash_func.update('\0'); + noun = nouns.begin()[hash_func.get64() % nouns.size()]; + } + + mapped = noun; + return mapped; +} + + +void obfuscateIdentifier(std::string_view src, WriteBuffer & result, WordMap & obfuscate_map, WordSet & used_nouns, SipHash hash_func) +{ + /// Find words in form 'snake_case', 'CamelCase' or 'ALL_CAPS'. + + const char * src_pos = src.data(); + const char * src_end = src_pos + src.size(); + + const char * word_begin = src_pos; + bool word_has_alphanumerics = false; + + auto append_word = [&] + { + std::string_view word(word_begin, src_pos - word_begin); + + if (keep_words.count(word)) + { + result.write(word.data(), word.size()); + } + else + { + std::string_view obfuscated_word = obfuscateWord(word, obfuscate_map, used_nouns, hash_func); + + /// Match the style of source word. + bool first_caps = !word.empty() && isUpperAlphaASCII(word[0]); + bool all_caps = first_caps && word.size() >= 2 && isUpperAlphaASCII(word[1]); + + for (size_t i = 0, size = obfuscated_word.size(); i < size; ++i) + { + if (all_caps || (i == 0 && first_caps)) + result.write(toUpperIfAlphaASCII(obfuscated_word[i])); + else + result.write(obfuscated_word[i]); + } + } + + word_begin = src_pos; + word_has_alphanumerics = false; + }; + + while (src_pos < src_end) + { + if (isAlphaNumericASCII(src_pos[0])) + word_has_alphanumerics = true; + + if (word_has_alphanumerics && src_pos[0] == '_') + { + append_word(); + result.write('_'); + ++word_begin; + } + else if (word_has_alphanumerics && isUpperAlphaASCII(src_pos[0]) && isLowerAlphaASCII(src_pos[-1])) /// xX + { + append_word(); + } + + ++src_pos; + } + + if (word_begin < src_pos) + append_word(); +} + + +void obfuscateLiteral(std::string_view src, WriteBuffer & result, SipHash hash_func) +{ + const char * src_pos = src.data(); + const char * src_end = src_pos + src.size(); + + while (src_pos < src_end) + { + /// Date + if (src_pos + strlen("0000-00-00") <= src_end + && isNumericASCII(src_pos[0]) + && isNumericASCII(src_pos[1]) + && isNumericASCII(src_pos[2]) + && isNumericASCII(src_pos[3]) + && src_pos[4] == '-' + && isNumericASCII(src_pos[5]) + && isNumericASCII(src_pos[6]) + && src_pos[7] == '-' + && isNumericASCII(src_pos[8]) + && isNumericASCII(src_pos[9])) + { + DayNum date; + ReadBufferFromMemory in(src_pos, strlen("0000-00-00")); + readDateText(date, in); + + SipHash hash_func_date = hash_func; + + if (date != 0) + { + date += hash_func_date.get64() % 256; + } + + writeDateText(date, result); + src_pos += strlen("0000-00-00"); + + /// DateTime + if (src_pos + strlen(" 00:00:00") <= src_end + && isNumericASCII(src_pos[1]) + && isNumericASCII(src_pos[2]) + && src_pos[3] == ':' + && isNumericASCII(src_pos[4]) + && isNumericASCII(src_pos[5]) + && src_pos[6] == ':' + && isNumericASCII(src_pos[7]) + && isNumericASCII(src_pos[8])) + { + result.write(src_pos[0]); + + hash_func_date.update(src_pos + 1, strlen("00:00:00")); + + uint64_t hash_value = hash_func_date.get64(); + uint32_t new_hour = hash_value % 24; + hash_value /= 24; + uint32_t new_minute = hash_value % 60; + hash_value /= 60; + uint32_t new_second = hash_value % 60; + + result.write('0' + (new_hour / 10)); + result.write('0' + (new_hour % 10)); + result.write(':'); + result.write('0' + (new_minute / 10)); + result.write('0' + (new_minute % 10)); + result.write(':'); + result.write('0' + (new_second / 10)); + result.write('0' + (new_second % 10)); + + src_pos += strlen(" 00:00:00"); + } + } + else if (isNumericASCII(src_pos[0])) + { + /// Number + if (src_pos[0] == '0' || src_pos[0] == '1') + { + /// Keep zero and one as is. + result.write(src_pos[0]); + ++src_pos; + } + else + { + ReadBufferFromMemory in(src_pos, src_end - src_pos); + uint64_t num; + readIntText(num, in); + SipHash hash_func_num = hash_func; + hash_func_num.update(src_pos, in.count()); + src_pos += in.count(); + + /// Obfuscate number but keep it within same power of two range. + + uint64_t obfuscated = hash_func_num.get64(); + uint64_t log2 = bitScanReverse(num); + + obfuscated = (1ULL << log2) + obfuscated % (1ULL << log2); + writeIntText(obfuscated, result); + } + } + else if (src_pos + 1 < src_end + && (src_pos[0] == 'e' || src_pos[0] == 'E') + && (isNumericASCII(src_pos[1]) || (src_pos[1] == '-' && src_pos + 2 < src_end && isNumericASCII(src_pos[2])))) + { + /// Something like an exponent of floating point number. Keep it as is. + /// But if it looks like a large number, overflow it into 16 bit. + + result.write(src_pos[0]); + ++src_pos; + + ReadBufferFromMemory in(src_pos, src_end - src_pos); + int16_t num; + readIntText(num, in); + writeIntText(num, result); + src_pos += in.count(); + } + else if (isAlphaASCII(src_pos[0])) + { + /// Alphabetial characters + + const char * alpha_end = src_pos + 1; + while (alpha_end < src_end && isAlphaASCII(*alpha_end)) + ++alpha_end; + + hash_func.update(src_pos, alpha_end - src_pos); + pcg64 rng(hash_func.get64()); + + while (src_pos < alpha_end) + { + auto random = rng(); + if (isLowerAlphaASCII(*src_pos)) + result.write('a' + random % 26); + else + result.write('A' + random % 26); + + ++src_pos; + } + } + else if (isASCII(src_pos[0])) + { + /// Punctuation, whitespace and control characters - keep as is. + + result.write(src_pos[0]); + ++src_pos; + } + else if (src_pos[0] <= '\xBF') + { + /// Continuation of UTF-8 sequence. + hash_func.update(src_pos[0]); + uint64_t hash = hash_func.get64(); + + char c = 0x80 + hash % (0xC0 - 0x80); + result.write(c); + + ++src_pos; + } + else + { + /// Start of UTF-8 sequence. + hash_func.update(src_pos[0]); + uint64_t hash = hash_func.get64(); + + if (src_pos[0] < '\xE0') + { + char c = 0xC0 + hash % 32; + result.write(c); + } + else if (src_pos[0] < '\xF0') + { + char c = 0xE0 + hash % 16; + result.write(c); + } + else + { + char c = 0xF0 + hash % 8; + result.write(c); + } + + ++src_pos; + } + } +} + +} + + +void obfuscateQueries( + std::string_view src, + WriteBuffer & result, + WordMap & obfuscate_map, + WordSet & used_nouns, + SipHash hash_func, + KnownIdentifierFunc known_identifier_func) +{ + Lexer lexer(src.data(), src.data() + src.size()); + while (true) + { + Token token = lexer.nextToken(); + std::string_view whole_token(token.begin, token.size()); + + if (token.isEnd()) + break; + + if (token.type == TokenType::BareWord) + { + std::string whole_token_uppercase(whole_token); + Poco::toUpperInPlace(whole_token_uppercase); + + if (keywords.count(whole_token_uppercase) + || known_identifier_func(whole_token)) + { + /// Keep keywords as is. + result.write(token.begin, token.size()); + } + else + { + /// Obfuscate identifiers + obfuscateIdentifier(whole_token, result, obfuscate_map, used_nouns, hash_func); + } + } + else if (token.type == TokenType::QuotedIdentifier) + { + assert(token.size() >= 2); + + /// Write quotes and the obfuscated content inside. + result.write(*token.begin); + obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + result.write(token.end[-1]); + } + else if (token.type == TokenType::Number) + { + obfuscateLiteral(whole_token, result, hash_func); + } + else if (token.type == TokenType::StringLiteral) + { + assert(token.size() >= 2); + + result.write(*token.begin); + obfuscateLiteral({token.begin + 1, token.size() - 2}, result, hash_func); + result.write(token.end[-1]); + } + else if (token.type == TokenType::Comment) + { + /// Skip comments - they may contain confidential info. + } + else + { + /// Everyting else is kept as is. + result.write(token.begin, token.size()); + } + } +} + +} + diff --git a/src/Parsers/obfuscateQueries.h b/src/Parsers/obfuscateQueries.h new file mode 100644 index 00000000000..0a192649a92 --- /dev/null +++ b/src/Parsers/obfuscateQueries.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +class WriteBuffer; + +using WordMap = std::unordered_map; +using WordSet = std::unordered_set; +using KnownIdentifierFunc = std::function; + +/** Takes one or multiple queries and obfuscates them by replacing identifiers to pseudorandom words + * and replacing literals to random values, while preserving the structure of the queries and the general sense. + * + * Its intended use case is when the user wants to share their queries for testing and debugging + * but is afraid to disclose the details about their column names, domain area and values of constants. + * + * It can obfuscate multiple queries in consistent fashion - identical names will be transformed to identical results. + * + * The function is not guaranteed to always give correct result or to be secure. It's implemented in "best effort" fashion. + * + * @param src - a string with source queries. + * @param result - where the obfuscated queries will be written. + * @param obfuscate_map - information about substituted identifiers + * (pass empty map at the beginning or reuse it from previous invocation to get consistent result) + * @param used_nouns - information about words used for substitution + * (pass empty set at the beginning or reuse it from previous invocation to get consistent result) + * @param hash_func - hash function that will be used as a pseudorandom source, + * it's recommended to preseed the function before passing here. + * @param known_identifier_func - a function that returns true if identifier is known name + * (of function, aggregate function, etc. that should be kept as is). If it returns false, identifier will be obfuscated. + */ +void obfuscateQueries( + std::string_view src, + WriteBuffer & result, + WordMap & obfuscate_map, + WordSet & used_nouns, + SipHash hash_func, + KnownIdentifierFunc known_identifier_func); + +} diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 0a0c301b722..4ec97b8b55b 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -85,6 +85,7 @@ SRCS( MySQL/ASTDeclareReference.cpp MySQL/ASTDeclareSubPartition.cpp MySQL/ASTDeclareTableOptions.cpp + obfuscateQueries.cpp parseDatabaseAndTableName.cpp parseIdentifierOrStringLiteral.cpp parseIntervalKind.cpp diff --git a/src/Processors/ForkProcessor.cpp b/src/Processors/ForkProcessor.cpp index 7fa21c4236d..9b17f8ad5ca 100644 --- a/src/Processors/ForkProcessor.cpp +++ b/src/Processors/ForkProcessor.cpp @@ -10,7 +10,6 @@ ForkProcessor::Status ForkProcessor::prepare() /// Check can output. - bool all_finished = true; bool all_can_push = true; size_t num_active_outputs = 0; @@ -18,7 +17,6 @@ ForkProcessor::Status ForkProcessor::prepare() { if (!output.isFinished()) { - all_finished = false; ++num_active_outputs; /// The order is important. @@ -27,7 +25,7 @@ ForkProcessor::Status ForkProcessor::prepare() } } - if (all_finished) + if (0 == num_active_outputs) { input.close(); return Status::Finished; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 42fa764f011..bc57803152f 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include @@ -8,17 +10,22 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; extern const int INCORRECT_DATA; } JSONAsStringRowInputFormat::JSONAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) : IRowInputFormat(header_, in_, std::move(params_)), buf(in) { - if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String) - { - throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::LOGICAL_ERROR); - } + if (header_.columns() > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "This input format is only suitable for tables with a single column of type String but the number of columns is {}", + header_.columns()); + + if (!isString(removeNullable(removeLowCardinality(header_.getByPosition(0).type)))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "This input format is only suitable for tables with a single column of type String but the column type is {}", + header_.getByPosition(0).type->getName()); } void JSONAsStringRowInputFormat::resetParser() diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index befc635386f..8f5eee4bb1b 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -36,7 +36,7 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column) while (newline) { - pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end()); + pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end()); buf.position() = pos; if (buf.position() == buf.buffer().end()) { @@ -47,12 +47,6 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column) { newline = false; } - else if (*buf.position() == '\\') - { - ++buf.position(); - if (!buf.eof()) - ++buf.position(); - } } buf.makeContinuousMemoryFromCheckpointToPos(); @@ -64,10 +58,12 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column) bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { - if (!buf.eof()) - readLineObject(*columns[0]); + if (buf.eof()) + return false; - return !buf.eof(); + readLineObject(*columns[0]); + + return true; } void registerInputFormatProcessorLineAsString(FormatFactory & factory) @@ -82,4 +78,30 @@ void registerInputFormatProcessorLineAsString(FormatFactory & factory) }); } +static bool fileSegmentationEngineLineAsStringpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) +{ + char * pos = in.position(); + bool need_more_data = true; + + while (loadAtPosition(in, memory, pos) && need_more_data) + { + pos = find_first_symbols<'\n'>(pos, in.buffer().end()); + if (pos == in.buffer().end()) + continue; + + if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size) + need_more_data = false; + + ++pos; + } + + saveUpToPosition(in, memory, pos); + return loadAtPosition(in, memory, pos); +} + +void registerFileSegmentationEngineLineAsString(FormatFactory & factory) +{ + factory.registerFileSegmentationEngine("LineAsString", &fileSegmentationEngineLineAsStringpImpl); +} + } diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp new file mode 100644 index 00000000000..bd9eaada05e --- /dev/null +++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +RawBLOBRowInputFormat::RawBLOBRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) + : IRowInputFormat(header_, in_, std::move(params_)) +{ + if (header_.columns() > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "This input format is only suitable for tables with a single column of type String but the number of columns is {}", + header_.columns()); + + if (!isString(removeNullable(removeLowCardinality(header_.getByPosition(0).type)))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "This input format is only suitable for tables with a single column of type String but the column type is {}", + header_.getByPosition(0).type->getName()); +} + +bool RawBLOBRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) +{ + if (in.eof()) + return false; + + /// One excessive copy. + String blob; + readStringUntilEOF(blob, in); + columns.at(0)->insertData(blob.data(), blob.size()); + return false; +} + +void registerInputFormatProcessorRawBLOB(FormatFactory & factory) +{ + factory.registerInputFormatProcessor("RawBLOB", []( + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams & params, + const FormatSettings &) + { + return std::make_shared(sample, buf, params); + }); +} + +} + diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h new file mode 100644 index 00000000000..fd2c849687a --- /dev/null +++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h @@ -0,0 +1,24 @@ +#pragma once + +#include + + +namespace DB +{ + +class ReadBuffer; + +/// This format slurps all input data into single value. +/// This format can only parse a table with single field of type String or similar. + +class RawBLOBRowInputFormat : public IRowInputFormat +{ +public: + RawBLOBRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); + + bool readRow(MutableColumns & columns, RowReadExtension &) override; + String getName() const override { return "RawBLOBRowInputFormat"; } +}; + +} + diff --git a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp new file mode 100644 index 00000000000..786edce5edc --- /dev/null +++ b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +namespace DB +{ + + +RawBLOBRowOutputFormat::RawBLOBRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + FormatFactory::WriteCallback callback) + : IRowOutputFormat(header_, out_, callback) +{ +} + + +void RawBLOBRowOutputFormat::writeField(const IColumn & column, const IDataType &, size_t row_num) +{ + StringRef value = column.getDataAt(row_num); + out.write(value.data, value.size); +} + + +void registerOutputFormatProcessorRawBLOB(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("RawBLOB", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback callback, + const FormatSettings &) + { + return std::make_shared(buf, sample, callback); + }); +} + +} + diff --git a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h new file mode 100644 index 00000000000..b6c13597d4b --- /dev/null +++ b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class WriteBuffer; + + +/** This format only allows to output columns of type String + * or types that have contiguous representation in memory. + * They are output as raw bytes without any delimiters or escaping. + * + * The difference between RawBLOB and TSVRaw: + * - data is output in binary, no escaping; + * - no delimiters between values; + * - no newline at the end of each value. + * + * The difference between RawBLOB and RowBinary: + * - strings are output without their lengths. + * + * If you are output more than one value, the output format is ambiguous and you may not be able to read data back. + */ +class RawBLOBRowOutputFormat : public IRowOutputFormat +{ +public: + RawBLOBRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + FormatFactory::WriteCallback callback); + + String getName() const override { return "RawBLOBRowOutputFormat"; } + + void writeField(const IColumn & column, const IDataType &, size_t row_num) override; +}; + +} + diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 7763de1642d..d193a502f2e 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -51,6 +51,8 @@ RegexpRowInputFormat::ColumnFormat RegexpRowInputFormat::stringToFormat(const St return ColumnFormat::Csv; if (format == "JSON") return ColumnFormat::Json; + if (format == "Raw") + return ColumnFormat::Raw; throw Exception("Unsupported column format \"" + format + "\".", ErrorCodes::BAD_ARGUMENTS); } @@ -88,6 +90,12 @@ bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns) else type->deserializeAsTextJSON(*columns[index], field_buf, format_settings); break; + case ColumnFormat::Raw: + if (parse_as_nullable) + read = DataTypeNullable::deserializeWholeText(*columns[index], field_buf, format_settings, type); + else + type->deserializeAsWholeText(*columns[index], field_buf, format_settings); + break; default: break; } diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp index 0be40019c58..71c4caaa795 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.cpp +++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp @@ -45,7 +45,7 @@ static void doDescribeHeader(const Block & header, size_t count, IQueryPlanStep: settings.out << prefix; first = false; - elem.dumpStructure(settings.out); + elem.dumpNameAndType(settings.out); settings.out << '\n'; } } diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 74781f4b5d9..1ff844480a9 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -225,7 +225,7 @@ static void explainStep( settings.out << "\n" << prefix << " "; first = false; - elem.dumpStructure(settings.out); + elem.dumpNameAndType(settings.out); } } diff --git a/src/Processors/ya.make b/src/Processors/ya.make index c69d274a70e..b2f8b9ba7c2 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -47,6 +47,8 @@ SRCS( Formats/Impl/PrettySpaceBlockOutputFormat.cpp Formats/Impl/ProtobufRowInputFormat.cpp Formats/Impl/ProtobufRowOutputFormat.cpp + Formats/Impl/RawBLOBRowInputFormat.cpp + Formats/Impl/RawBLOBRowOutputFormat.cpp Formats/Impl/RegexpRowInputFormat.cpp Formats/Impl/TabSeparatedRowInputFormat.cpp Formats/Impl/TabSeparatedRowOutputFormat.cpp diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 6e4bc4dc80c..48cde6b6aa9 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -426,6 +426,16 @@ CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_ return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); } +ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + const auto it = columns.get<1>().find(column_name); + + if (it == columns.get<1>().end() || !it->codec) + return default_codec->getFullCodecDesc(); + + return it->codec; +} + ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const { ColumnTTLs ret; diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 6e2d3299091..8d1009b7263 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -115,6 +115,7 @@ public: bool hasCompressionCodec(const String & column_name) const; CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; CompressionCodecPtr getCodecOrDefault(const String & column_name) const; + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; String toString() const; static ColumnsDescription parse(const String & str); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 40500e78de1..4a2e70aa84b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -337,6 +337,8 @@ public: throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + virtual void checkTableCanBeRenamed() const {} + /** Rename the table. * Renaming a name in a file with metadata, the name in the list of tables in the RAM, is done separately. * In this function, you need to rename the directory with the data, if any. diff --git a/src/Storages/JoinSettings.cpp b/src/Storages/JoinSettings.cpp new file mode 100644 index 00000000000..15637d67dea --- /dev/null +++ b/src/Storages/JoinSettings.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(joinSettingsTraits, LIST_OF_JOIN_SETTINGS) + +void JoinSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); + else + e.rethrow(); + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} diff --git a/src/Storages/JoinSettings.h b/src/Storages/JoinSettings.h new file mode 100644 index 00000000000..13be557a439 --- /dev/null +++ b/src/Storages/JoinSettings.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define JOIN_RELATED_SETTINGS(M) \ + M(Bool, persistent, true, "Disable setting to avoid the overhead of writing to disk for StorageJoin", 0) + +#define LIST_OF_JOIN_SETTINGS(M) \ + JOIN_RELATED_SETTINGS(M) \ + FORMAT_FACTORY_SETTINGS(M) + +DECLARE_SETTINGS_TRAITS(joinSettingsTraits, LIST_OF_JOIN_SETTINGS) + + +/** Settings for the Join engine. + * Could be loaded from a CREATE TABLE query (SETTINGS clause). + */ +struct JoinSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 4e98caf066a..e5cc6d6ecbf 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -51,7 +51,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( for (const NameAndTypePair & column : columns) { column.type->enumerateStreams( - [&](const IDataType::SubstreamPath & substream_path) + [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_path */) { ++stream_counts[IDataType::getFileNameForStream(column.name, substream_path)]; }, @@ -62,7 +62,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( const String mrk_extension = data_part->getMarksFileExtension(); for (const auto & column_name : empty_columns) { - IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_path */) { String stream_name = IDataType::getFileNameForStream(column_name, substream_path); /// Delete files if they are no longer shared with another column. diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index 89715085eda..a5f7ebce84f 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -124,7 +124,8 @@ private: /// Watch for the node in front of us. --my_node_it; - if (!zookeeper.existsWatch(path + "/" + *my_node_it, nullptr, task->getWatchCallback())) + std::string get_path_value; + if (!zookeeper.tryGetWatch(path + "/" + *my_node_it, get_path_value, nullptr, task->getWatchCallback())) task->schedule(); success = true; diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelector.h index e460b8ae06a..fcdfcf5b890 100644 --- a/src/Storages/MergeTree/MergeSelector.h +++ b/src/Storages/MergeTree/MergeSelector.h @@ -44,7 +44,7 @@ public: /// Information about different TTLs for part. Can be used by /// TTLSelector to assign merges with TTL. - const MergeTreeDataPartTTLInfos * ttl_infos; + const MergeTreeDataPartTTLInfos * ttl_infos = nullptr; /// Part compression codec definition. ASTPtr compression_codec_desc; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 0fc5ec43048..1125eb32b66 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -717,6 +717,8 @@ protected: bool require_part_metadata; + /// Relative path data, changes during rename for ordinary databases use + /// under lockForShare if rename is possible. String relative_data_path; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 91a34efc2b5..73b3fe698cc 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1452,7 +1452,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames( for (const NameAndTypePair & column : source_part->getColumns()) { column.type->enumerateStreams( - [&](const IDataType::SubstreamPath & substream_path) + [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { ++stream_counts[IDataType::getFileNameForStream(column.name, substream_path)]; }, @@ -1470,7 +1470,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames( } else if (command.type == MutationCommand::Type::DROP_COLUMN) { - IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(command.column_name, substream_path); /// Delete files if they are no longer shared with another column. @@ -1491,7 +1491,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames( String escaped_name_from = escapeForFileName(command.column_name); String escaped_name_to = escapeForFileName(command.rename_to); - IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String stream_from = IDataType::getFileNameForStream(command.column_name, substream_path); @@ -1524,7 +1524,7 @@ NameSet MergeTreeDataMergerMutator::collectFilesToSkip( /// Skip updated files for (const auto & entry : updated_header) { - IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(entry.name, substream_path); files_to_skip.insert(stream_name + ".bin"); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index c53362c847d..39898a7cc57 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -77,7 +77,7 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( if (checksums.empty()) return size; - type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String file_name = IDataType::getFileNameForStream(column_name, substream_path); @@ -155,7 +155,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const for (const NameAndTypePair & name_type : columns) { IDataType::SubstreamPath stream_path; - name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String file_name = IDataType::getFileNameForStream(name_type.name, substream_path); String mrk_file_name = file_name + index_granularity_info.marks_file_extension; @@ -177,7 +177,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const std::optional marks_size; for (const NameAndTypePair & name_type : columns) { - name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { auto file_path = path + IDataType::getFileNameForStream(name_type.name, substream_path) + index_granularity_info.marks_file_extension; @@ -205,7 +205,7 @@ bool MergeTreeDataPartWide::hasColumnFiles(const String & column_name, const IDa { bool res = true; - type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String file_name = IDataType::getFileNameForStream(column_name, substream_path); @@ -222,7 +222,7 @@ bool MergeTreeDataPartWide::hasColumnFiles(const String & column_name, const IDa String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { String filename; - column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { if (filename.empty()) filename = IDataType::getFileNameForStream(column.name, substream_path); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 235c76e744b..c81894ee36d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -3,6 +3,7 @@ namespace DB { + MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_list_, @@ -30,14 +31,37 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( { const auto & storage_columns = metadata_snapshot->getColumns(); for (const auto & column : columns_list) - { - auto codec = storage_columns.getCodecOrDefault(column.name, default_codec); - auto & stream = streams_by_codec[codec->getHash()]; - if (!stream) - stream = std::make_shared(plain_hashing, codec); + addStreams(column.name, *column.type, storage_columns.getCodecDescOrDefault(column.name, default_codec)); +} - compressed_streams.push_back(stream); - } +void MergeTreeDataPartWriterCompact::addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc) +{ + IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type) + { + String stream_name = IDataType::getFileNameForStream(name, substream_path); + + /// Shared offsets for Nested type. + if (compressed_streams.count(stream_name)) + return; + + CompressionCodecPtr compression_codec; + + /// If we can use special codec than just get it + if (IDataType::isSpecialCompressionAllowed(substream_path)) + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec); + else /// otherwise return only generic codecs and don't use info about data_type + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); + + UInt64 codec_id = compression_codec->getHash(); + auto & stream = streams_by_codec[codec_id]; + if (!stream) + stream = std::make_shared(plain_hashing, compression_codec); + + compressed_streams.emplace(stream_name, stream); + }; + + IDataType::SubstreamPath stream_path; + type.enumerateStreams(callback, stream_path); } void MergeTreeDataPartWriterCompact::write( @@ -110,18 +134,37 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block) auto name_and_type = columns_list.begin(); for (size_t i = 0; i < columns_list.size(); ++i, ++name_and_type) { - auto & stream = compressed_streams[i]; + /// Tricky part, because we share compressed streams between different columns substreams. + /// Compressed streams write data to the single file, but with different compression codecs. + /// So we flush each stream (using next()) before using new one, because otherwise we will override + /// data in result file. + CompressedStreamPtr prev_stream; + auto stream_getter = [&, this](const IDataType::SubstreamPath & substream_path) -> WriteBuffer * + { + String stream_name = IDataType::getFileNameForStream(name_and_type->name, substream_path); + + auto & result_stream = compressed_streams[stream_name]; + /// Write one compressed block per column in granule for more optimal reading. + if (prev_stream && prev_stream != result_stream) + { + /// Offset should be 0, because compressed block is written for every granule. + assert(result_stream->hashing_buf.offset() == 0); + prev_stream->hashing_buf.next(); + } + + prev_stream = result_stream; + + return &result_stream->hashing_buf; + }; - /// Offset should be 0, because compressed block is written for every granule. - assert(stream->hashing_buf.offset() == 0); writeIntBinary(plain_hashing.count(), marks); writeIntBinary(UInt64(0), marks); - writeColumnSingleGranule(block.getByName(name_and_type->name), stream, current_row, rows_to_write); + writeColumnSingleGranule(block.getByName(name_and_type->name), stream_getter, current_row, rows_to_write); - /// Write one compressed block per column in granule for more optimal reading. - stream->hashing_buf.next(); + /// Each type always have at least one substream + prev_stream->hashing_buf.next(); //-V522 } ++from_mark; @@ -145,13 +188,14 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block) void MergeTreeDataPartWriterCompact::writeColumnSingleGranule( const ColumnWithTypeAndName & column, - const CompressedStreamPtr & stream, - size_t from_row, size_t number_of_rows) + IDataType::OutputStreamGetter stream_getter, + size_t from_row, + size_t number_of_rows) { IDataType::SerializeBinaryBulkStatePtr state; IDataType::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.getter = [&stream](IDataType::SubstreamPath) -> WriteBuffer * { return &stream->hashing_buf; }; + serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 4beb0dba340..a5a1a859e7a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -30,6 +30,8 @@ private: void addToChecksums(MergeTreeDataPartChecksums & checksums); + void addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc); + Block header; /** Simplified SquashingTransform. The original one isn't suitable in this case @@ -52,22 +54,25 @@ private: std::unique_ptr plain_file; HashingWriteBuffer plain_hashing; + /// Compressed stream which allows to write with codec. struct CompressedStream { CompressedWriteBuffer compressed_buf; HashingWriteBuffer hashing_buf; CompressedStream(WriteBuffer & buf, const CompressionCodecPtr & codec) - : compressed_buf(buf, codec), hashing_buf(compressed_buf) {} + : compressed_buf(buf, codec) + , hashing_buf(compressed_buf) {} }; using CompressedStreamPtr = std::shared_ptr; - /// Create compressed stream for every different codec. + /// Create compressed stream for every different codec. All streams write to + /// a single file on disk. std::unordered_map streams_by_codec; - /// For better performance save pointer to stream by every column. - std::vector compressed_streams; + /// Stream for each column's substreams path (look at addStreams). + std::unordered_map compressed_streams; /// marks -> marks_file std::unique_ptr marks_file; @@ -76,7 +81,7 @@ private: /// Write single granule of one column (rows between 2 marks) static void writeColumnSingleGranule( const ColumnWithTypeAndName & column, - const CompressedStreamPtr & stream, + IDataType::OutputStreamGetter stream_getter, size_t from_row, size_t number_of_rows); }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a558c57b5f0..c15c39e7b7f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -28,28 +29,35 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( { const auto & columns = metadata_snapshot->getColumns(); for (const auto & it : columns_list) - addStreams(it.name, *it.type, columns.getCodecOrDefault(it.name, default_codec), settings.estimated_size); + addStreams(it.name, *it.type, columns.getCodecDescOrDefault(it.name, default_codec), settings.estimated_size); } void MergeTreeDataPartWriterWide::addStreams( const String & name, const IDataType & type, - const CompressionCodecPtr & effective_codec, + const ASTPtr & effective_codec_desc, size_t estimated_size) { - IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type) { String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Shared offsets for Nested type. if (column_streams.count(stream_name)) return; + CompressionCodecPtr compression_codec; + /// If we can use special codec then just get it + if (IDataType::isSpecialCompressionAllowed(substream_path)) + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec); + else /// otherwise return only generic codecs and don't use info about the data_type + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); + column_streams[stream_name] = std::make_unique( stream_name, data_part->volume->getDisk(), part_path + stream_name, DATA_FILE_EXTENSION, part_path + stream_name, marks_file_extension, - effective_codec, + compression_codec, settings.max_compress_block_size, estimated_size, settings.aio_threshold); @@ -130,7 +138,7 @@ void MergeTreeDataPartWriterWide::writeSingleMark( size_t number_of_rows, DB::IDataType::SubstreamPath & path) { - type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) + type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; @@ -170,7 +178,7 @@ size_t MergeTreeDataPartWriterWide::writeSingleGranule( type.serializeBinaryBulkWithMultipleStreams(column, from_row, number_of_rows, serialize_settings, serialization_state); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. - type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) + type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; @@ -251,7 +259,7 @@ void MergeTreeDataPartWriterWide::writeColumn( current_column_mark++; } - type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) + type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets) @@ -312,7 +320,7 @@ void MergeTreeDataPartWriterWide::writeFinalMark( { writeSingleMark(column_name, *column_type, offset_columns, 0, path); /// Memoize information about offsets - column_type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) + column_type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 02ab2a7ca56..6dd3104c5b4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -66,7 +66,7 @@ private: void addStreams( const String & name, const IDataType & type, - const CompressionCodecPtr & effective_codec, + const ASTPtr & effective_codec_desc, size_t estimated_size); SerializationStates serialization_states; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2b8b886daaf..8b5b337bcec 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -223,7 +223,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (part_column_queried) VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context); - std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); + auto part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID()); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 352d1f93589..1dacdacbae0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -162,7 +162,7 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si void MergeTreeReaderWide::addStreams(const String & name, const IDataType & type, const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) { - IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(name, substream_path); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index a5216e6fda3..11f23a5c110 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -56,10 +56,12 @@ void ReplicatedMergeTreeCleanupThread::run() void ReplicatedMergeTreeCleanupThread::iterate() { storage.clearOldPartsAndRemoveFromZK(); - storage.clearOldWriteAheadLogs(); { auto lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); + /// Both use relative_data_path which changes during rename, so we + /// do it under share lock + storage.clearOldWriteAheadLogs(); storage.clearOldTemporaryDirectories(); } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 63b061b9702..2838c8eb881 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -120,7 +120,7 @@ IMergeTreeDataPart::Checksums checkDataPart( { for (const auto & column : columns_list) { - column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String file_name = IDataType::getFileNameForStream(column.name, substream_path) + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(disk, path + file_name); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 1188fd2edd3..f94425a81d3 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -412,26 +412,31 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// For Replicated. String zookeeper_path; String replica_name; + bool allow_renaming = true; if (replicated) { bool has_arguments = arg_num + 2 <= arg_cnt; bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); + ASTLiteral * ast_zk_path; + ASTLiteral * ast_replica_name; + if (has_valid_arguments) { - const auto * ast = engine_args[arg_num]->as(); - if (ast && ast->value.getType() == Field::Types::String) - zookeeper_path = safeGet(ast->value); + /// Get path and name from engine arguments + ast_zk_path = engine_args[arg_num]->as(); + if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) + zookeeper_path = safeGet(ast_zk_path->value); else throw Exception( "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); ++arg_num; - ast = engine_args[arg_num]->as(); - if (ast && ast->value.getType() == Field::Types::String) - replica_name = safeGet(ast->value); + ast_replica_name = engine_args[arg_num]->as(); + if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) + replica_name = safeGet(ast_replica_name->value); else throw Exception( "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -441,12 +446,27 @@ static StoragePtr create(const StorageFactory::Arguments & args) "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); ++arg_num; } - else if (is_extended_storage_def && !has_arguments) + else if (is_extended_storage_def && arg_cnt == 0) { /// Try use default values if arguments are not specified. - /// It works for ON CLUSTER queries when database engine is Atomic and there are {shard} and {replica} in config. - zookeeper_path = "/clickhouse/tables/{uuid}/{shard}"; - replica_name = "{replica}"; /// TODO maybe use hostname if {replica} is not defined? + /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. + zookeeper_path = args.context.getConfigRef().getString("default_replica_path", "/clickhouse/tables/{uuid}/{shard}"); + /// TODO maybe use hostname if {replica} is not defined? + replica_name = args.context.getConfigRef().getString("default_replica_name", "{replica}"); + + /// Modify query, so default values will be written to metadata + assert(arg_num == 0); + ASTs old_args; + std::swap(engine_args, old_args); + auto path_arg = std::make_shared(zookeeper_path); + auto name_arg = std::make_shared(replica_name); + ast_zk_path = path_arg.get(); + ast_replica_name = name_arg.get(); + engine_args.emplace_back(std::move(path_arg)); + engine_args.emplace_back(std::move(name_arg)); + std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); + arg_num = 2; + arg_cnt += 2; } else throw Exception("Expected two string literal arguments: zookeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); @@ -454,8 +474,44 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; bool allow_uuid_macro = is_on_cluster || args.query.attach; - zookeeper_path = args.context.getMacros()->expand(zookeeper_path, args.table_id, allow_uuid_macro); - replica_name = args.context.getMacros()->expand(replica_name, args.table_id, false); + + /// Unfold {database} and {table} macro on table creation, so table can be renamed. + /// We also unfold {uuid} macro, so path will not be broken after moving table from Atomic to Ordinary database. + if (!args.attach) + { + Macros::MacroExpansionInfo info; + /// NOTE: it's not recursive + info.expand_special_macros_only = true; + info.table_id = args.table_id; + if (!allow_uuid_macro) + info.table_id.uuid = UUIDHelpers::Nil; + zookeeper_path = args.context.getMacros()->expand(zookeeper_path, info); + + info.level = 0; + info.table_id.uuid = UUIDHelpers::Nil; + replica_name = args.context.getMacros()->expand(replica_name, info); + } + + ast_zk_path->value = zookeeper_path; + ast_replica_name->value = replica_name; + + /// Expand other macros (such as {shard} and {replica}). We do not expand them on previous step + /// to make possible copying metadata files between replicas. + Macros::MacroExpansionInfo info; + info.table_id = args.table_id; + if (!allow_uuid_macro) + info.table_id.uuid = UUIDHelpers::Nil; + zookeeper_path = args.context.getMacros()->expand(zookeeper_path, info); + + info.level = 0; + info.table_id.uuid = UUIDHelpers::Nil; + replica_name = args.context.getMacros()->expand(replica_name, info); + + /// We do not allow renaming table with these macros in metadata, because zookeeper_path will be broken after RENAME TABLE. + /// NOTE: it may happen if table was created by older version of ClickHouse (< 20.10) and macros was not unfolded on table creation + /// or if one of these macros is recursively expanded from some other macro. + if (info.expanded_database || info.expanded_table) + allow_renaming = false; } /// This merging param maybe used as part of sorting key @@ -706,7 +762,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) date_column_name, merging_params, std::move(storage_settings), - args.has_force_restore_data_flag); + args.has_force_restore_data_flag, + allow_renaming); else return StorageMergeTree::create( args.table_id, diff --git a/src/Storages/SetSettings.cpp b/src/Storages/SetSettings.cpp new file mode 100644 index 00000000000..f7ff1c446f2 --- /dev/null +++ b/src/Storages/SetSettings.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(setSettingsTraits, LIST_OF_SET_SETTINGS) + +void SetSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); + else + e.rethrow(); + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} diff --git a/src/Storages/SetSettings.h b/src/Storages/SetSettings.h new file mode 100644 index 00000000000..c5926e409c2 --- /dev/null +++ b/src/Storages/SetSettings.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define SET_RELATED_SETTINGS(M) \ + M(Bool, persistent, true, "Disable setting to avoid the overhead of writing to disk for StorageSet", 0) + +#define LIST_OF_SET_SETTINGS(M) \ + SET_RELATED_SETTINGS(M) \ + FORMAT_FACTORY_SETTINGS(M) + +DECLARE_SETTINGS_TRAITS(setSettingsTraits, LIST_OF_SET_SETTINGS) + + +/** Settings for the Set engine. + * Could be loaded from a CREATE TABLE query (SETTINGS clause). + */ +struct SetSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 0a8ceb4b8e5..eda9f36010f 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -180,7 +180,16 @@ StoragePtr StorageFactory::get( .has_force_restore_data_flag = has_force_restore_data_flag }; - return storages.at(name).creator_fn(arguments); + auto res = storages.at(name).creator_fn(arguments); + if (!empty_engine_args.empty()) + { + /// Storage creator modified empty arguments list, so we should modify the query + assert(storage_def && storage_def->engine && !storage_def->engine->arguments); + storage_def->engine->arguments = std::make_shared(); + storage_def->engine->children.push_back(storage_def->engine->arguments); + storage_def->engine->arguments->children = empty_engine_args; + } + return res; } StorageFactory & StorageFactory::instance() diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 4e3fe01e858..33c67229cfa 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -44,8 +44,9 @@ StorageJoin::StorageJoin( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, bool overwrite_, - const Context & context_) - : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_} + const Context & context_, + bool persistent_) + : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_, persistent_} , key_names(key_names_) , use_nulls(use_nulls_) , limits(limits_) @@ -118,6 +119,7 @@ void registerStorageJoin(StorageFactory & factory) auto join_overflow_mode = settings.join_overflow_mode; auto join_any_take_last_row = settings.join_any_take_last_row; auto old_any_join = settings.any_join_distinct_right_table_keys; + bool persistent = true; if (args.storage_def && args.storage_def->settings) { @@ -135,6 +137,12 @@ void registerStorageJoin(StorageFactory & factory) join_any_take_last_row = setting.value; else if (setting.name == "any_join_distinct_right_table_keys") old_any_join = setting.value; + else if (setting.name == "persistent") + { + auto join_settings = std::make_unique(); + join_settings->loadFromQuery(*args.storage_def); + persistent = join_settings->persistent; + } else throw Exception( "Unknown setting " + setting.name + " for storage " + args.engine_name, @@ -217,7 +225,8 @@ void registerStorageJoin(StorageFactory & factory) args.columns, args.constraints, join_any_take_last_row, - args.context); + args.context, + persistent); }; factory.registerStorage("Join", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 386282708c9..95037c4d33a 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -3,6 +3,7 @@ #include #include +#include #include @@ -72,7 +73,8 @@ protected: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, bool overwrite, - const Context & context_); + const Context & context_, + bool persistent_); }; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index e437bfb05f1..944dc0e5804 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -362,7 +362,7 @@ void LogBlockOutputStream::writeData(const String & name, const IDataType & type { IDataType::SerializeBinaryBulkSettings settings; - type.enumerateStreams([&] (const IDataType::SubstreamPath & path) + type.enumerateStreams([&] (const IDataType::SubstreamPath & path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(name, path); if (written_streams.count(stream_name)) @@ -382,7 +382,7 @@ void LogBlockOutputStream::writeData(const String & name, const IDataType & type if (serialize_states.count(name) == 0) type.serializeBinaryBulkStatePrefix(settings, serialize_states[name]); - type.enumerateStreams([&] (const IDataType::SubstreamPath & path) + type.enumerateStreams([&] (const IDataType::SubstreamPath & path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(name, path); if (written_streams.count(stream_name)) @@ -400,7 +400,7 @@ void LogBlockOutputStream::writeData(const String & name, const IDataType & type type.serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]); - type.enumerateStreams([&] (const IDataType::SubstreamPath & path) + type.enumerateStreams([&] (const IDataType::SubstreamPath & path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(name, path); if (!written_streams.emplace(stream_name).second) @@ -487,7 +487,7 @@ void StorageLog::addFiles(const String & column_name, const IDataType & type) throw Exception("Duplicate column with name " + column_name + " in constructor of StorageLog.", ErrorCodes::DUPLICATE_COLUMN); - IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(column_name, substream_path); @@ -597,7 +597,7 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMeta * (Example: for Array data type, first stream is array sizes; and number of array sizes is the number of arrays). */ IDataType::SubstreamPath substream_root_path; - column_type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + column_type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { if (filename.empty()) filename = IDataType::getFileNameForStream(column_name, substream_path); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 42ac9cb6a5b..bade0810320 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -41,19 +43,42 @@ namespace { /// Rewrite original query removing joined tables from it -void removeJoin(const ASTSelectQuery & select) +bool removeJoin(ASTSelectQuery & select) { const auto & tables = select.tables(); if (!tables || tables->children.size() < 2) - return; + return false; const auto & joined_table = tables->children[1]->as(); if (!joined_table.table_join) - return; + return false; /// The most simple temporary solution: leave only the first table in query. /// TODO: we also need to remove joined columns and related functions (taking in account aliases if any). tables->children.resize(1); + return true; +} + +void modifySelect(ASTSelectQuery & select, const TreeRewriterResult & rewriter_result) +{ + if (removeJoin(select)) + { + /// Also remove GROUP BY cause ExpressionAnalyzer would check if it has all aggregate columns but joined columns would be missed. + select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); + + /// Replace select list to remove joined columns + auto select_list = std::make_shared(); + for (const auto & column : rewriter_result.required_source_columns) + select_list->children.emplace_back(std::make_shared(column.name)); + + select.setExpression(ASTSelectQuery::Expression::SELECT, select_list); + + /// TODO: keep WHERE/PREWHERE. We have to remove joined columns and their expressions but keep others. + select.setExpression(ASTSelectQuery::Expression::WHERE, {}); + select.setExpression(ASTSelectQuery::Expression::PREWHERE, {}); + select.setExpression(ASTSelectQuery::Expression::HAVING, {}); + select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {}); + } } } @@ -265,7 +290,8 @@ Pipe StorageMerge::createSources( modified_query_info.query = query_info.query->clone(); /// Original query could contain JOIN but we need only the first joined table and its columns. - removeJoin(*modified_query_info.query->as()); + auto & modified_select = modified_query_info.query->as(); + modifySelect(modified_select, *query_info.syntax_analyzer_result); VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", table_name); @@ -282,7 +308,7 @@ Pipe StorageMerge::createSources( return pipe; } - auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, query_info.query); + auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, modified_query_info.query); if (processed_stage <= storage_stage) { /// If there are only virtual columns in query, you must request at least one other column. @@ -294,7 +320,7 @@ Pipe StorageMerge::createSources( } else if (processed_stage > storage_stage) { - modified_query_info.query->as()->replaceDatabaseAndTable(source_database, table_name); + modified_select.replaceDatabaseAndTable(source_database, table_name); /// Maximum permissible parallelism is streams_num modified_context->setSetting("max_threads", streams_num); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 347474753dc..55fb42b550e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -919,11 +919,13 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::mergeMutateTask() { { auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + /// All use relative_data_path which changes during rename + /// so execute under share lock. clearOldPartsFromFilesystem(); clearOldTemporaryDirectories(); + clearOldWriteAheadLogs(); } clearOldMutations(); - clearOldWriteAheadLogs(); } ///TODO: read deduplicate option from table config diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index b2e622663c0..afbca0d9430 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -96,7 +96,7 @@ Pipe StorageMySQL::read( /// TODO: rewrite MySQLBlockInputStream return Pipe(std::make_shared( - std::make_shared(pool.get(), query, sample_block, max_block_size_))); + std::make_shared(pool, query, sample_block, max_block_size_, /* auto_close = */ true))); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 65c0c5ac313..9613bd5111d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -178,7 +178,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const String & date_column_name, const MergingParams & merging_params_, std::unique_ptr settings_, - bool has_force_restore_data_flag) + bool has_force_restore_data_flag, + bool allow_renaming_) : MergeTreeData(table_id_, relative_data_path_, metadata_, @@ -200,6 +201,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , cleanup_thread(*this) , part_check_thread(*this) , restarting_thread(*this) + , allow_renaming(allow_renaming_) { queue_updating_task = global_context.getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); @@ -4187,8 +4189,17 @@ void StorageReplicatedMergeTree::checkTableCanBeDropped() const global_context.checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes()); } +void StorageReplicatedMergeTree::checkTableCanBeRenamed() const +{ + if (!allow_renaming) + throw Exception("Cannot rename Replicated table, because zookeeper_path contains implicit 'database' or 'table' macro. " + "We cannot rename path in ZooKeeper, so path may become inconsistent with table name. If you really want to rename table, " + "you should edit metadata file first and restart server or reattach the table.", ErrorCodes::NOT_IMPLEMENTED); +} + void StorageReplicatedMergeTree::rename(const String & new_path_to_table_data, const StorageID & new_table_id) { + checkTableCanBeRenamed(); MergeTreeData::rename(new_path_to_table_data, new_table_id); /// Update table name in zookeeper diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 2bc9265331d..d851082d5c2 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -128,6 +128,8 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override; + void checkTableCanBeRenamed() const override; + void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; bool supportsIndexForIn() const override { return true; } @@ -304,6 +306,9 @@ private: /// True if replica was created for existing table with fixed granularity bool other_replicas_fixed_granularity = false; + /// Do not allow RENAME TABLE if zookeeper_path contains {database} or {table} macro + const bool allow_renaming; + template void foreachCommittedParts(const Func & func) const; @@ -571,7 +576,8 @@ protected: const String & date_column_name, const MergingParams & merging_params_, std::unique_ptr settings_, - bool has_force_restore_data_flag); + bool has_force_restore_data_flag, + bool allow_renaming_); }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6d17a17409f..469350d6eaa 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -197,7 +197,7 @@ StorageS3::StorageS3( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, Context & context_, - const String & compression_method_ = "") + const String & compression_method_) : IStorage(table_id_) , uri(uri_) , context_global(context_) @@ -349,8 +349,6 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) Poco::URI uri (url); S3::URI s3_uri (uri); - String format_name = engine_args[engine_args.size() - 1]->as().value.safeGet(); - String access_key_id; String secret_access_key; if (engine_args.size() >= 4) @@ -362,12 +360,30 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) UInt64 min_upload_part_size = args.local_context.getSettingsRef().s3_min_upload_part_size; String compression_method; + String format_name; if (engine_args.size() == 3 || engine_args.size() == 5) + { compression_method = engine_args.back()->as().value.safeGet(); + format_name = engine_args[engine_args.size() - 2]->as().value.safeGet(); + } else + { compression_method = "auto"; + format_name = engine_args.back()->as().value.safeGet(); + } - return StorageS3::create(s3_uri, access_key_id, secret_access_key, args.table_id, format_name, min_upload_part_size, args.columns, args.constraints, args.context); + return StorageS3::create( + s3_uri, + access_key_id, + secret_access_key, + args.table_id, + format_name, + min_upload_part_size, + args.columns, + args.constraints, + args.context, + compression_method + ); }, { .source_access_type = AccessType::S3, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index a172d951642..5a702aa8785 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -34,7 +34,7 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, Context & context_, - const String & compression_method_); + const String & compression_method_ = ""); String getName() const override { diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index f2946afbbfd..d6d8b9e1449 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include namespace DB @@ -35,7 +37,7 @@ public: SetOrJoinBlockOutputStream( StorageSetOrJoinBase & table_, const StorageMetadataPtr & metadata_snapshot_, const String & backup_path_, const String & backup_tmp_path_, - const String & backup_file_name_); + const String & backup_file_name_, bool persistent_); Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override; @@ -50,6 +52,7 @@ private: WriteBufferFromFile backup_buf; CompressedWriteBuffer compressed_backup_buf; NativeBlockOutputStream backup_stream; + bool persistent; }; @@ -58,7 +61,8 @@ SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream( const StorageMetadataPtr & metadata_snapshot_, const String & backup_path_, const String & backup_tmp_path_, - const String & backup_file_name_) + const String & backup_file_name_, + bool persistent_) : table(table_) , metadata_snapshot(metadata_snapshot_) , backup_path(backup_path_) @@ -67,6 +71,7 @@ SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream( , backup_buf(backup_tmp_path + backup_file_name) , compressed_backup_buf(backup_buf) , backup_stream(compressed_backup_buf, 0, metadata_snapshot->getSampleBlock()) + , persistent(persistent_) { } @@ -76,24 +81,28 @@ void SetOrJoinBlockOutputStream::write(const Block & block) Block sorted_block = block.sortColumns(); table.insertBlock(sorted_block); - backup_stream.write(sorted_block); + if (persistent) + backup_stream.write(sorted_block); } void SetOrJoinBlockOutputStream::writeSuffix() { table.finishInsert(); - backup_stream.flush(); - compressed_backup_buf.next(); - backup_buf.next(); + if (persistent) + { + backup_stream.flush(); + compressed_backup_buf.next(); + backup_buf.next(); - Poco::File(backup_tmp_path + backup_file_name).renameTo(backup_path + backup_file_name); + Poco::File(backup_tmp_path + backup_file_name).renameTo(backup_path + backup_file_name); + } } BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { UInt64 id = ++increment; - return std::make_shared(*this, metadata_snapshot, path, path + "tmp/", toString(id) + ".bin"); + return std::make_shared(*this, metadata_snapshot, path, path + "tmp/", toString(id) + ".bin", persistent); } @@ -102,8 +111,10 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const Context & context_) - : IStorage(table_id_) + const Context & context_, + bool persistent_) + : IStorage(table_id_), + persistent(persistent_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); @@ -124,8 +135,9 @@ StorageSet::StorageSet( const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const Context & context_) - : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_}, + const Context & context_, + bool persistent_) + : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_, persistent_}, set(std::make_shared(SizeLimits(), false, true)) { @@ -229,8 +241,16 @@ void registerStorageSet(StorageFactory & factory) "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - return StorageSet::create(args.relative_data_path, args.table_id, args.columns, args.constraints, args.context); - }); + bool has_settings = args.storage_def->settings; + + auto set_settings = std::make_unique(); + if (has_settings) + { + set_settings->loadFromQuery(*args.storage_def); + } + + return StorageSet::create(args.relative_data_path, args.table_id, args.columns, args.constraints, args.context, set_settings->persistent); + }, StorageFactory::StorageFeatures{ .supports_settings = true, }); } diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index de7c65bbc3e..40d7925de13 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB @@ -31,10 +32,12 @@ protected: const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const Context & context_); + const Context & context_, + bool persistent_); String base_path; String path; + bool persistent; std::atomic increment = 0; /// For the backup file names. @@ -82,7 +85,8 @@ protected: const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const Context & context_); + const Context & context_, + bool persistent_); }; } diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 0bdcab8abf4..a59480f0a0d 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -390,7 +390,7 @@ void StorageTinyLog::addFiles(const String & column_name, const IDataType & type throw Exception("Duplicate column with name " + column_name + " in constructor of StorageTinyLog.", ErrorCodes::DUPLICATE_COLUMN); - IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path) + IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { String stream_name = IDataType::getFileNameForStream(column_name, substream_path); if (!files.count(stream_name)) diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 89b69eb79e3..445a996ab87 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -30,9 +30,9 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c /// Extract from the input stream a set of `name` column values template -std::multiset extractSingleValueFromBlock(const Block & block, const String & name) +auto extractSingleValueFromBlock(const Block & block, const String & name) { - std::multiset res; + std::unordered_set res; const ColumnWithTypeAndName & data = block.getByName(name); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) diff --git a/src/Storages/ya.make b/src/Storages/ya.make index bbc5acdfa7a..107433b5e73 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -20,6 +20,7 @@ SRCS( getStructureOfRemoteTable.cpp IndicesDescription.cpp IStorage.cpp + JoinSettings.cpp KeyDescription.cpp LiveView/StorageLiveView.cpp LiveView/TemporaryLiveViewCleaner.cpp @@ -108,6 +109,7 @@ SRCS( ReadInOrderOptimizer.cpp registerStorages.cpp SelectQueryDescription.cpp + SetSettings.cpp StorageBuffer.cpp StorageDictionary.cpp StorageDistributed.cpp diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 220d8d801ec..7626ac5a904 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -237,7 +237,7 @@ "with_coverage": false } }, - "Functional stateful tests (release, DatabaseAtomic)": { + "Functional stateful tests (release, DatabaseOrdinary)": { "required_build_properties": { "compiler": "gcc-10", "package_type": "deb", @@ -345,7 +345,7 @@ "with_coverage": false } }, - "Functional stateless tests (release, DatabaseAtomic)": { + "Functional stateless tests (release, DatabaseOrdinary)": { "required_build_properties": { "compiler": "gcc-10", "package_type": "deb", @@ -441,6 +441,18 @@ "with_coverage": false } }, + "Integration tests flaky check (asan)": { + "required_build_properties": { + "compiler": "clang-11", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang-tidy": "disable", + "with_coverage": false + } + }, "Compatibility check": { "required_build_properties": { "compiler": "gcc-10", @@ -560,6 +572,18 @@ "clang-tidy": "disable", "with_coverage": false } + }, + "Release": { + "required_build_properties": { + "compiler": "gcc-10", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang-tidy": "disable", + "with_coverage": false + } } } } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 2a1dbcb3cfd..bf17443b0b3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -303,6 +303,12 @@ def run_tests_array(all_tests_with_params): clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite)) + if clickhouse_proc.returncode != 0: + failures += 1 + print("Server does not respond to health check") + SERVER_DIED = True + break + reference_file = os.path.join(suite_dir, name) + '.reference' stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout' stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr' diff --git a/tests/config/config.d/macros.xml b/tests/config/config.d/macros.xml index 97c3065471f..4902b12bc81 100644 --- a/tests/config/config.d/macros.xml +++ b/tests/config/config.d/macros.xml @@ -3,5 +3,7 @@ Hello, world! s1 r1 + /clickhouse/tables/{database}/{shard}/ + table_{table} diff --git a/tests/config/install.sh b/tests/config/install.sh index 0f33854ef95..ef9604904e7 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -15,40 +15,40 @@ mkdir -p $DEST_SERVER_PATH/config.d/ mkdir -p $DEST_SERVER_PATH/users.d/ mkdir -p $DEST_CLIENT_PATH -ln -s $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/part_log.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/metric_log.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/custom_settings_prefixes.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/disks.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ -ln -s $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ -ln -s $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ -ln -s $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/part_log.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/metric_log.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/custom_settings_prefixes.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/disks.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ -ln -s $SRC_PATH/ints_dictionary.xml $DEST_SERVER_PATH/ -ln -s $SRC_PATH/strings_dictionary.xml $DEST_SERVER_PATH/ -ln -s $SRC_PATH/decimals_dictionary.xml $DEST_SERVER_PATH/ -ln -s $SRC_PATH/executable_dictionary.xml $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/ints_dictionary.xml $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/strings_dictionary.xml $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/decimals_dictionary.xml $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/executable_dictionary.xml $DEST_SERVER_PATH/ -ln -s $SRC_PATH/server.key $DEST_SERVER_PATH/ -ln -s $SRC_PATH/server.crt $DEST_SERVER_PATH/ -ln -s $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/server.key $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/server.crt $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/ # Retain any pre-existing config and allow ClickHouse to load it if required -ln -s --backup=simple --suffix=_original.xml \ +ln -sf --backup=simple --suffix=_original.xml \ $SRC_PATH/config.d/query_masking_rules.xml $DEST_SERVER_PATH/config.d/ if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then - ln -s $SRC_PATH/config.d/polymorphic_parts.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/polymorphic_parts.xml $DEST_SERVER_PATH/config.d/ fi if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then - ln -s $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/ + ln -sf $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/ fi ln -sf $SRC_PATH/client_config.xml $DEST_CLIENT_PATH/config.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0b7fa9264bd..d99d8a17844 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -45,7 +45,6 @@ def _create_env_file(path, variables, fname=DEFAULT_ENV_NAME): f.write("=".join([var, value]) + "\n") return full_path - def subprocess_check_call(args): # Uncomment for debugging # print('run:', ' ' . join(args)) @@ -125,6 +124,7 @@ class ClickHouseCluster: self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] + self.base_kerberized_kafka_cmd = [] self.base_rabbitmq_cmd = [] self.base_cassandra_cmd = [] self.pre_zookeeper_commands = [] @@ -133,6 +133,7 @@ class ClickHouseCluster: self.with_mysql = False self.with_postgres = False self.with_kafka = False + self.with_kerberized_kafka = False self.with_rabbitmq = False self.with_odbc_drivers = False self.with_hdfs = False @@ -169,7 +170,7 @@ class ClickHouseCluster: def add_instance(self, name, base_config_dir=None, main_configs=None, user_configs=None, dictionaries=None, macros=None, - with_zookeeper=False, with_mysql=False, with_kafka=False, with_rabbitmq=False, + with_zookeeper=False, with_mysql=False, with_kafka=False, with_kerberized_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, with_cassandra=False, @@ -207,6 +208,7 @@ class ClickHouseCluster: zookeeper_config_path=self.zookeeper_config_path, with_mysql=with_mysql, with_kafka=with_kafka, + with_kerberized_kafka=with_kerberized_kafka, with_rabbitmq=with_rabbitmq, with_mongo=with_mongo, with_redis=with_redis, @@ -290,6 +292,13 @@ class ClickHouseCluster: p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')] cmds.append(self.base_kafka_cmd) + if with_kerberized_kafka and not self.with_kerberized_kafka: + self.with_kerberized_kafka = True + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]) + self.base_kerberized_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')] + cmds.append(self.base_kerberized_kafka_cmd) + if with_rabbitmq and not self.with_rabbitmq: self.with_rabbitmq = True self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]) @@ -608,6 +617,11 @@ class ClickHouseCluster: self.kafka_docker_id = self.get_instance_docker_id('kafka1') self.wait_schema_registry_to_start(120) + if self.with_kerberized_kafka and self.base_kerberized_kafka_cmd: + env = os.environ.copy() + env['KERBERIZED_KAFKA_DIR'] = instance.path + '/' + subprocess.check_call(self.base_kerberized_kafka_cmd + common_opts + ['--renew-anon-volumes'], env=env) + self.kerberized_kafka_docker_id = self.get_instance_docker_id('kerberized_kafka1') if self.with_rabbitmq and self.base_rabbitmq_cmd: subprocess_check_call(self.base_rabbitmq_cmd + common_opts + ['--renew-anon-volumes']) self.rabbitmq_docker_id = self.get_instance_docker_id('rabbitmq1') @@ -788,9 +802,12 @@ services: - {instance_config_dir}:/etc/clickhouse-server/ - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ + - /etc/passwd:/etc/passwd:ro {binary_volume} {odbc_bridge_volume} {odbc_ini_path} + {keytab_path} + {krb5_conf} entrypoint: {entrypoint_cmd} tmpfs: {tmpfs} cap_add: @@ -820,7 +837,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, base_config_dir, custom_main_configs, custom_user_configs, custom_dictionaries, - macros, with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, + macros, with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_kerberized_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, with_cassandra, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None, @@ -839,6 +856,7 @@ class ClickHouseInstance: self.custom_user_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_user_configs] self.custom_dictionaries_paths = [p.abspath(p.join(base_path, c)) for c in custom_dictionaries] self.clickhouse_path_dir = p.abspath(p.join(base_path, clickhouse_path_dir)) if clickhouse_path_dir else None + self.kerberos_secrets_dir = p.abspath(p.join(base_path, 'secrets')) self.macros = macros if macros is not None else {} self.with_zookeeper = with_zookeeper self.zookeeper_config_path = zookeeper_config_path @@ -848,6 +866,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka + self.with_kerberized_kafka = with_kerberized_kafka self.with_rabbitmq = with_rabbitmq self.with_mongo = with_mongo self.with_redis = with_redis @@ -863,6 +882,13 @@ class ClickHouseInstance: else: self.odbc_ini_path = "" + if with_kerberized_kafka: + self.keytab_path = '- ' + os.path.dirname(self.docker_compose_path) + "/secrets:/tmp/keytab" + self.krb5_conf = '- ' + os.path.dirname(self.docker_compose_path) + "/secrets/krb.conf:/etc/krb5.conf:ro" + else: + self.keytab_path = "" + self.krb5_conf = "" + self.docker_client = None self.ip_address = None self.client = None @@ -1192,6 +1218,9 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) + if self.with_kerberized_kafka: + shutil.copytree(self.kerberos_secrets_dir, p.abspath(p.join(self.path, 'secrets'))) + # Copy config.d configs print "Copy custom test config files {} to {}".format(self.custom_main_config_paths, self.config_d_dir) for path in self.custom_main_config_paths: @@ -1227,6 +1256,9 @@ class ClickHouseInstance: depends_on.append("kafka1") depends_on.append("schema-registry") + if self.with_kerberized_kafka: + depends_on.append("kerberized_kafka1") + if self.with_rabbitmq: depends_on.append("rabbitmq1") @@ -1290,6 +1322,8 @@ class ClickHouseInstance: user=os.getuid(), env_file=env_file, odbc_ini_path=odbc_ini_path, + keytab_path=self.keytab_path, + krb5_conf=self.krb5_conf, entrypoint_cmd=entrypoint_cmd, networks=networks, app_net=app_net, diff --git a/tests/integration/runner b/tests/integration/runner index f097a42e52a..78d93af2929 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -155,7 +155,9 @@ if __name__ == "__main__": elif image == "yandex/clickhouse-postgresql-java-client": env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) elif image == "yandex/clickhouse-integration-test": - env_tags += "-e {}={}".format("DOCKER_BASE_TAG", tag) + env_tags += "-e {}={} ".format("DOCKER_BASE_TAG", tag) + elif image == "yandex/clickhouse-kerberos-kdc": + env_tags += "-e {}={}".format("DOCKER_KERBEROS_KDC_TAG", tag) else: logging.info("Unknown image {}".format(image)) diff --git a/tests/integration/test_compression_nested_columns/__init__.py b/tests/integration/test_compression_nested_columns/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_compression_nested_columns/test.py b/tests/integration/test_compression_nested_columns/test.py new file mode 100644 index 00000000000..f73adadd770 --- /dev/null +++ b/tests/integration/test_compression_nested_columns/test.py @@ -0,0 +1,68 @@ +import random +import string + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + +def get_compression_codec_byte(node, table_name, part_name, filename): + cmd = "tail -c +17 /var/lib/clickhouse/data/default/{}/{}/{}.bin | od -x -N 1 | head -n 1 | awk '{{print $2}}'".format( + table_name, part_name, filename) + return node.exec_in_container(["bash", "-c", cmd]).strip() + +CODECS_MAPPING = { + 'NONE' : '0002', + 'LZ4': '0082', + 'LZ4HC': '0082', # not an error, same byte + 'ZSTD': '0090', + 'Multiple': '0091', + 'Delta': '0092', + 'T64': '0093', +} + +def test_nested_compression_codec(start_cluster): + for i, node in enumerate([node1, node2]): + node.query(""" + CREATE TABLE compression_table ( + key UInt64, + column_ok Nullable(UInt64) CODEC(Delta, LZ4), + column_array Array(Array(UInt64)) CODEC(T64, LZ4), + column_bad LowCardinality(Int64) CODEC(Delta) + ) ENGINE = ReplicatedMergeTree('/t', '{}') ORDER BY tuple() PARTITION BY key + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; + """.format(i), settings={"allow_suspicious_codecs" : "1", "allow_suspicious_low_cardinality_types" : "1"}) + + node1.query("INSERT INTO compression_table VALUES (1, 1, [[77]], 32)") + + node2.query("SYSTEM SYNC REPLICA compression_table", timeout=5) + + node1.query("DETACH TABLE compression_table") + node2.query("DETACH TABLE compression_table") + + node1.query("ATTACH TABLE compression_table") + node2.query("ATTACH TABLE compression_table") + + for node in [node1, node2]: + assert get_compression_codec_byte(node, "compression_table", "1_0_0_0", "column_ok") == CODECS_MAPPING['Multiple'] + assert get_compression_codec_byte(node, "compression_table", "1_0_0_0", "column_ok.null") == CODECS_MAPPING['LZ4'] + + assert get_compression_codec_byte(node1, "compression_table", "1_0_0_0", "column_array") == CODECS_MAPPING['Multiple'] + assert get_compression_codec_byte(node2, "compression_table", "1_0_0_0", "column_array.size0") == CODECS_MAPPING['LZ4'] + assert get_compression_codec_byte(node2, "compression_table", "1_0_0_0", "column_array.size1") == CODECS_MAPPING['LZ4'] + + assert get_compression_codec_byte(node2, "compression_table", "1_0_0_0", "column_bad.dict") == CODECS_MAPPING['Delta'] + assert get_compression_codec_byte(node1, "compression_table", "1_0_0_0", "column_bad") == CODECS_MAPPING['NONE'] diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py index b788dafe167..9f01fa7ed5b 100755 --- a/tests/integration/test_distributed_ddl/test.py +++ b/tests/integration/test_distributed_ddl/test.py @@ -330,10 +330,12 @@ def test_replicated_without_arguments(test_cluster): assert "are supported only for ON CLUSTER queries with Atomic database engine" in \ instance.query_and_get_error("CREATE TABLE test_atomic.rmt (n UInt64, s String) ENGINE=ReplicatedMergeTree ORDER BY n") test_cluster.ddl_check_query(instance, - "CREATE TABLE test_atomic.rmt ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree ORDER BY n") + "CREATE TABLE test_atomic.rmt ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree() ORDER BY n") test_cluster.ddl_check_query(instance, "DROP TABLE test_atomic.rmt ON CLUSTER cluster") test_cluster.ddl_check_query(instance, - "CREATE TABLE test_atomic.rmt ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree ORDER BY n") + "CREATE TABLE test_atomic.rmt UUID '12345678-0000-4000-8000-000000000001' ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree ORDER BY n") + assert instance.query("SHOW CREATE test_atomic.rmt FORMAT TSVRaw") == \ + "CREATE TABLE test_atomic.rmt\n(\n `n` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree('/clickhouse/tables/12345678-0000-4000-8000-000000000001/{shard}', '{replica}')\nORDER BY n\nSETTINGS index_granularity = 8192\n" test_cluster.ddl_check_query(instance, "RENAME TABLE test_atomic.rmt TO test_atomic.rmt_renamed ON CLUSTER cluster") test_cluster.ddl_check_query(instance, "CREATE TABLE test_atomic.rmt ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') ORDER BY n") @@ -349,6 +351,8 @@ def test_replicated_without_arguments(test_cluster): assert "are supported only for ON CLUSTER queries with Atomic database engine" in \ instance.query_and_get_error("CREATE TABLE test_ordinary.rmt ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree('/{shard}/{uuid}/', '{replica}') ORDER BY n") test_cluster.ddl_check_query(instance, "CREATE TABLE test_ordinary.rmt ON CLUSTER cluster (n UInt64, s String) ENGINE=ReplicatedMergeTree('/{shard}/{table}/', '{replica}') ORDER BY n") + assert instance.query("SHOW CREATE test_ordinary.rmt FORMAT TSVRaw") == \ + "CREATE TABLE test_ordinary.rmt\n(\n `n` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree('/{shard}/rmt/', '{replica}')\nORDER BY n\nSETTINGS index_granularity = 8192\n" test_cluster.ddl_check_query(instance, "DROP DATABASE test_ordinary ON CLUSTER cluster") test_cluster.pm_random_drops.push_rules(rules) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 7acb8c5fe00..45b3c3c65f0 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -75,7 +75,6 @@ def drop_table(cluster): minio = cluster.minio_client node.query("DROP TABLE IF EXISTS s3_test NO DELAY") - time.sleep(1) try: assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 finally: @@ -317,7 +316,6 @@ def test_move_replace_partition_to_another_table(cluster): minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 node.query("DROP TABLE s3_clone NO DELAY") - time.sleep(1) assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" # Data should remain in S3 @@ -330,7 +328,6 @@ def test_move_replace_partition_to_another_table(cluster): list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 node.query("DROP TABLE s3_test NO DELAY") - time.sleep(1) # Backup data should remain in S3. assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD_PER_PART_WIDE * 4 diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 399b9263123..a8824b383ab 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -124,6 +124,9 @@ def test_clickhouse_dml_for_mysql_database(started_cluster): clickhouse_node.query("INSERT INTO `test_database`.`test_table`(`i``d`) select number from numbers(10000)") assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '10000' + clickhouse_node.query("DROP DATABASE test_database") + assert 'test_database' not in clickhouse_node.query('SHOW DATABASES') + mysql_node.query("DROP DATABASE test_database") @@ -160,6 +163,36 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster): mysql_node.query("DROP DATABASE test_bad_arguments") +def test_data_types_support_level_for_mysql_database_engine(started_cluster): + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', port=3308)) as mysql_node: + mysql_node.query("CREATE DATABASE IF NOT EXISTS test DEFAULT CHARACTER SET 'utf8'") + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', test, 'root', 'clickhouse')", + settings={"mysql_datatypes_support_level": "decimal,datetime64"}) + + assert "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" in clickhouse_node.query("SHOW CREATE DATABASE test_database FORMAT TSV") + clickhouse_node.query("DETACH DATABASE test_database") + + # without context settings + clickhouse_node.query("ATTACH DATABASE test_database") + assert "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" in clickhouse_node.query("SHOW CREATE DATABASE test_database FORMAT TSV") + + clickhouse_node.query( + "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql1:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'", + settings={"mysql_datatypes_support_level": "decimal"}) + + assert "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" in clickhouse_node.query("SHOW CREATE DATABASE test_database_1 FORMAT TSV") + clickhouse_node.query("DETACH DATABASE test_database_1") + + # without context settings + clickhouse_node.query("ATTACH DATABASE test_database_1") + assert "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" in clickhouse_node.query("SHOW CREATE DATABASE test_database_1 FORMAT TSV") + + clickhouse_node.query("DROP DATABASE test_database") + clickhouse_node.query("DROP DATABASE test_database_1") + assert 'test_database' not in clickhouse_node.query('SHOW DATABASES') + mysql_node.query("DROP DATABASE test") + + decimal_values = [0.123, 0.4, 5.67, 8.91011, 123456789.123, -0.123, -0.4, -5.67, -8.91011, -123456789.123] timestamp_values = ['2015-05-18 07:40:01.123', '2019-09-16 19:20:11.123'] timestamp_values_no_subsecond = ['2015-05-18 07:40:01', '2019-09-16 19:20:11'] diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index b5fd312007a..3e737dc2644 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -238,7 +238,9 @@ def test_mysql_federated(mysql_server, server_address): node.query('''INSERT INTO mysql_federated.test VALUES (0), (1), (5)''', settings={"password": "123"}) def check_retryable_error_in_stderr(stderr): - return "Can't connect to local MySQL server through socket" in stderr or "MySQL server has gone away" in stderr + return ("Can't connect to local MySQL server through socket" in stderr + or "MySQL server has gone away" in stderr + or "Server shutdown in progress" in stderr) code, (stdout, stderr) = mysql_server.exec_run(''' mysql diff --git a/tests/integration/test_rename_column/test.py b/tests/integration/test_rename_column/test.py index 51921c3385c..b7ab8a75ba5 100644 --- a/tests/integration/test_rename_column/test.py +++ b/tests/integration/test_rename_column/test.py @@ -37,7 +37,6 @@ def started_cluster(): def drop_table(nodes, table_name): for node in nodes: node.query("DROP TABLE IF EXISTS {} NO DELAY".format(table_name)) - time.sleep(1) def create_table(nodes, table_name, with_storage_policy=False, with_time_column=False, diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py index 9df209826f9..70a50ae0e15 100644 --- a/tests/integration/test_s3_with_proxy/test.py +++ b/tests/integration/test_s3_with_proxy/test.py @@ -1,5 +1,6 @@ import logging import os +import time import pytest from helpers.cluster import ClickHouseCluster @@ -37,10 +38,15 @@ def cluster(): def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET", "DELETE"}): - logs = cluster.get_container_logs(proxy_instance) - # Check that all possible interactions with Minio are present - for http_method in http_methods: - assert logs.find(http_method + " http://minio1") >= 0 + for i in range(10): + logs = cluster.get_container_logs(proxy_instance) + # Check with retry that all possible interactions with Minio are present + for http_method in http_methods: + if logs.find(http_method + " http://minio1") >= 0: + return + time.sleep(1) + else: + assert False, "http method not found in logs" @pytest.mark.parametrize( diff --git a/tests/integration/test_storage_kerberized_kafka/__init__.py b/tests/integration/test_storage_kerberized_kafka/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_kerberized_kafka/clickhouse_path/EMPTY_DIR b/tests/integration/test_storage_kerberized_kafka/clickhouse_path/EMPTY_DIR new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml b/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml new file mode 100644 index 00000000000..0302bd78e3f --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml @@ -0,0 +1,26 @@ + + + earliest + + SASL_PLAINTEXT + GSSAPI + kafka + /tmp/keytab/clickhouse.keytab + kafkauser/instance@TEST.CLICKHOUSE.TECH + security + false + + + + + 300 + + 6000 + + diff --git a/tests/integration/test_storage_kerberized_kafka/configs/log_conf.xml b/tests/integration/test_storage_kerberized_kafka/configs/log_conf.xml new file mode 100644 index 00000000000..95466269afe --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + \ No newline at end of file diff --git a/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh b/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh new file mode 100644 index 00000000000..dda10d47d94 --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/kerberos_image_config.sh @@ -0,0 +1,132 @@ +#!/bin/bash + + +set -x # trace + +: "${REALM:=TEST.CLICKHOUSE.TECH}" +: "${DOMAIN_REALM:=test.clickhouse.tech}" +: "${KERB_MASTER_KEY:=masterkey}" +: "${KERB_ADMIN_USER:=admin}" +: "${KERB_ADMIN_PASS:=admin}" + +create_config() { + : "${KDC_ADDRESS:=$(hostname -f)}" + + cat>/etc/krb5.conf</var/kerberos/krb5kdc/kdc.conf< /var/kerberos/krb5kdc/kadm5.acl +} + +create_keytabs() { + + kadmin.local -q "addprinc -randkey zookeeper/kafka_kerberized_zookeeper@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kafka_kerberized_zookeeper.keytab zookeeper/kafka_kerberized_zookeeper@${REALM}" + + kadmin.local -q "addprinc -randkey kafka/kerberized_kafka1@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kerberized_kafka.keytab kafka/kerberized_kafka1@${REALM}" + + kadmin.local -q "addprinc -randkey zkclient@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/zkclient.keytab zkclient@${REALM}" + + + kadmin.local -q "addprinc -randkey kafkauser/instance@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab kafkauser/instance@${REALM}" + + chmod g+r /tmp/keytab/clickhouse.keytab + +} + +main() { + + if [ ! -f /kerberos_initialized ]; then + create_config + create_db + create_admin_user + start_kdc + + touch /kerberos_initialized + fi + + if [ ! -f /var/kerberos/krb5kdc/principal ]; then + while true; do sleep 1000; done + else + start_kdc + create_keytabs + tail -F /var/log/kerberos/krb5kdc.log + fi + +} + +[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@" diff --git a/tests/integration/test_storage_kerberized_kafka/secrets/broker_jaas.conf b/tests/integration/test_storage_kerberized_kafka/secrets/broker_jaas.conf new file mode 100644 index 00000000000..8a55ec2faa0 --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/secrets/broker_jaas.conf @@ -0,0 +1,14 @@ +KafkaServer { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + keyTab="/etc/kafka/secrets/kerberized_kafka.keytab" + principal="kafka/kerberized_kafka1@TEST.CLICKHOUSE.TECH"; +}; +Client { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + keyTab="/etc/kafka/secrets/zkclient.keytab" + principal="zkclient@TEST.CLICKHOUSE.TECH"; +}; diff --git a/tests/integration/test_storage_kerberized_kafka/secrets/krb.conf b/tests/integration/test_storage_kerberized_kafka/secrets/krb.conf new file mode 100644 index 00000000000..1efdf510f22 --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/secrets/krb.conf @@ -0,0 +1,22 @@ +[logging] + default = FILE:/var/log/kerberos/krb5libs.log + kdc = FILE:/var/log/kerberos/krb5kdc.log + admin_server = FILE:/var/log/kerberos/kadmind.log + +[libdefaults] + default_realm = TEST.CLICKHOUSE.TECH + dns_lookup_realm = false + dns_lookup_kdc = false + ticket_lifetime = 15s + renew_lifetime = 15s + forwardable = true + +[realms] + TEST.CLICKHOUSE.TECH = { + kdc = kafka_kerberos + admin_server = kafka_kerberos + } + +[domain_realm] + .TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH + TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH diff --git a/tests/integration/test_storage_kerberized_kafka/secrets/zookeeper_jaas.conf b/tests/integration/test_storage_kerberized_kafka/secrets/zookeeper_jaas.conf new file mode 100644 index 00000000000..1b1f8103f42 --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/secrets/zookeeper_jaas.conf @@ -0,0 +1,14 @@ +Server { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + keyTab="/etc/kafka/secrets/kafka_kerberized_zookeeper.keytab" + principal="zookeeper/kafka_kerberized_zookeeper@TEST.CLICKHOUSE.TECH"; +}; +Client { + com.sun.security.auth.module.Krb5LoginModule required + useKeyTab=true + storeKey=true + keyTab="/etc/kafka/secrets/zkclient.keytab" + principal="zkclient@TEST.CLICKHOUSE.TECH"; +}; diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py new file mode 100644 index 00000000000..ec23d340977 --- /dev/null +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -0,0 +1,146 @@ +import os.path as p +import random +import threading +import time +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException +from helpers.network import PartitionManager + +import json +import subprocess +import kafka.errors +from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnection +from kafka.admin import NewTopic +from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsRequest_v1 +from kafka.protocol.group import MemberAssignment +import socket + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + main_configs=['configs/kafka.xml', 'configs/log_conf.xml' ], + with_kerberized_kafka=True, + clickhouse_path_dir="clickhouse_path" + ) +kafka_id = '' # instance.cluster.kafka_docker_id + +# Helpers + +def check_kafka_is_available(): + + # plaintext + p = subprocess.Popen(('docker', + 'exec', + '-i', + kafka_id, + '/usr/bin/kafka-broker-api-versions', + '--bootstrap-server', + 'localhost:9093'), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def wait_kafka_is_available(max_retries=50): + retries = 0 + while True: + if check_kafka_is_available(): + break + else: + retries += 1 + if retries > max_retries: + raise "Kafka is not available" + print("Waiting for Kafka to start up") + time.sleep(1) + + +def kafka_produce(topic, messages, timestamp=None): + producer = KafkaProducer(bootstrap_servers="localhost:9093") + for message in messages: + producer.send(topic=topic, value=message, timestamp_ms=timestamp) + producer.flush() + print ("Produced {} messages for topic {}".format(len(messages), topic)) + + + +# Fixtures + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + global kafka_id + cluster.start() + kafka_id = instance.cluster.kerberized_kafka_docker_id + print("kafka_id is {}".format(kafka_id)) + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def kafka_setup_teardown(): + instance.query('DROP DATABASE IF EXISTS test; CREATE DATABASE test;') + wait_kafka_is_available() + print("kafka is available - running test") + yield # run test + +# Tests + +@pytest.mark.timeout(180) # wait to build containers +def test_kafka_json_as_string(kafka_cluster): + kafka_produce('kafka_json_as_string', ['{"t": 123, "e": {"x": "woof"} }', '', '{"t": 124, "e": {"x": "test"} }', '{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"}']) + + instance.query(''' + CREATE TABLE test.kafka (field String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kerberized_kafka1:19092', + kafka_topic_list = 'kafka_json_as_string', + kafka_group_name = 'kafka_json_as_string', + kafka_format = 'JSONAsString', + kafka_flush_interval_ms=1000; + ''') + + result = instance.query('SELECT * FROM test.kafka;') + expected = '''\ +{"t": 123, "e": {"x": "woof"} } +{"t": 124, "e": {"x": "test"} } +{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"} +''' + assert TSV(result) == TSV(expected) + assert instance.contains_in_log("Parsing of message (topic: kafka_json_as_string, partition: 0, offset: 1) return no rows") + +def test_kafka_json_as_string_no_kdc(kafka_cluster): + kafka_produce('kafka_json_as_string_no_kdc', ['{"t": 123, "e": {"x": "woof"} }', '', '{"t": 124, "e": {"x": "test"} }', '{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"}']) + + kafka_cluster.pause_container('kafka_kerberos') + time.sleep(45) # wait for ticket expiration + + instance.query(''' + CREATE TABLE test.kafka_no_kdc (field String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kerberized_kafka1:19092', + kafka_topic_list = 'kafka_json_as_string_no_kdc', + kafka_group_name = 'kafka_json_as_string_no_kdc', + kafka_format = 'JSONAsString', + kafka_flush_interval_ms=1000; + ''') + + result = instance.query('SELECT * FROM test.kafka_no_kdc;') + expected = '' + + kafka_cluster.unpause_container('kafka_kerberos') + + + assert TSV(result) == TSV(expected) + assert instance.contains_in_log("StorageKafka (kafka_no_kdc): Nothing to commit") + assert instance.contains_in_log("Ticket expired") + assert instance.contains_in_log("Kerberos ticket refresh failed") + + +if __name__ == '__main__': + cluster.start() + raw_input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 0b73866eaee..83ef1e6c86a 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -33,6 +33,25 @@ def started_cluster(): cluster.shutdown() +def test_many_connections(started_cluster): + table_name = 'test_many_connections' + conn = get_mysql_conn() + create_mysql_table(conn, table_name) + + node1.query(''' +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql1:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +'''.format(table_name, table_name)) + + node1.query("INSERT INTO {} (id, name) SELECT number, concat('name_', toString(number)) from numbers(10) ".format(table_name)) + + query = "SELECT count() FROM (" + for i in range (24): + query += "SELECT id FROM {t} UNION ALL " + query += "SELECT id FROM {t})" + + assert node1.query(query.format(t=table_name)) == '250\n' + conn.close() + def test_insert_select(started_cluster): table_name = 'test_insert_select' conn = get_mysql_conn() diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index e39296525d0..f752e72c677 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1,8 +1,11 @@ +import gzip import json import logging import os import random +import StringIO import threading +import time import helpers.client import pytest @@ -57,6 +60,11 @@ def prepare_s3_bucket(cluster): minio_client.make_bucket(cluster.minio_restricted_bucket) +def put_s3_file_content(cluster, bucket, filename, data): + buf = StringIO.StringIO(data) + cluster.minio_client.put_object(bucket, filename, buf, len(data)) + + # Returns content of given S3 file as string. def get_s3_file_content(cluster, bucket, filename): # type: (ClickHouseCluster, str) -> str @@ -344,3 +352,116 @@ def test_infinite_redirect(cluster): exception_raised = True finally: assert exception_raised + + +def test_storage_s3_get_gzip(cluster): + bucket = cluster.minio_bucket + instance = cluster.instances["dummy"] + filename = "test_get_gzip.bin" + name = "test_get_gzip" + data = [ + "Sophia Intrieri,55", + "Jack Taylor,71", + "Christopher Silva,66", + "Clifton Purser,35", + "Richard Aceuedo,43", + "Lisa Hensley,31", + "Alice Wehrley,1", + "Mary Farmer,47", + "Samara Ramirez,19", + "Shirley Lloyd,51", + "Santos Cowger,0", + "Richard Mundt,88", + "Jerry Gonzalez,15", + "Angela James,10", + "Norman Ortega,33", + "" + ] + buf = StringIO.StringIO() + compressed = gzip.GzipFile(fileobj=buf, mode="wb") + compressed.write("\n".join(data)) + compressed.close() + put_s3_file_content(cluster, bucket, filename, buf.getvalue()) + + try: + run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format( + name, cluster.minio_host, cluster.minio_port, bucket, filename)) + + run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"] + + finally: + run_query(instance, "DROP TABLE {}".format(name)) + + +def test_storage_s3_put_uncompressed(cluster): + bucket = cluster.minio_bucket + instance = cluster.instances["dummy"] + filename = "test_put_uncompressed.bin" + name = "test_put_uncompressed" + data = [ + "'Gloria Thompson',99", + "'Matthew Tang',98", + "'Patsy Anderson',23", + "'Nancy Badillo',93", + "'Roy Hunt',5", + "'Adam Kirk',51", + "'Joshua Douds',28", + "'Jolene Ryan',0", + "'Roxanne Padilla',50", + "'Howard Roberts',41", + "'Ricardo Broughton',13", + "'Roland Speer',83", + "'Cathy Cohan',58", + "'Kathie Dawson',100", + "'Gregg Mcquistion',11", + ] + try: + run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV')".format( + name, cluster.minio_host, cluster.minio_port, bucket, filename)) + + run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data))) + + run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["753"] + + uncompressed_content = get_s3_file_content(cluster, bucket, filename) + assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 753 + finally: + run_query(instance, "DROP TABLE {}".format(name)) + + +def test_storage_s3_put_gzip(cluster): + bucket = cluster.minio_bucket + instance = cluster.instances["dummy"] + filename = "test_put_gzip.bin" + name = "test_put_gzip" + data = [ + "'Joseph Tomlinson',5", + "'Earnest Essary',44", + "'Matha Pannell',24", + "'Michael Shavers',46", + "'Elias Groce',38", + "'Pamela Bramlet',50", + "'Lewis Harrell',49", + "'Tamara Fyall',58", + "'George Dixon',38", + "'Alice Walls',49", + "'Paula Mais',24", + "'Myrtle Pelt',93", + "'Sylvia Naffziger',18", + "'Amanda Cave',83", + "'Yolanda Joseph',89" + ] + try: + run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format( + name, cluster.minio_host, cluster.minio_port, bucket, filename)) + + run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data))) + + run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["708"] + + buf = StringIO.StringIO(get_s3_file_content(cluster, bucket, filename)) + f = gzip.GzipFile(fileobj=buf, mode="rb") + uncompressed_content = f.read() + assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708 + finally: + run_query(instance, "DROP TABLE {}".format(name)) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 878db2da11f..13fb779a6e6 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -27,7 +27,6 @@ def started_cluster(): def drop_table(nodes, table_name): for node in nodes: node.query("DROP TABLE IF EXISTS {} NO DELAY".format(table_name)) - time.sleep(1) # Column TTL works only with wide parts, because it's very expensive to apply it for compact parts def test_ttl_columns(started_cluster): diff --git a/tests/performance/bigint_arithm.xml b/tests/performance/bigint_arithm.xml new file mode 100644 index 00000000000..4d81f2e2c2d --- /dev/null +++ b/tests/performance/bigint_arithm.xml @@ -0,0 +1,15 @@ + + + 10G + + + SELECT toInt128(number) + number FROM numbers_mt(1000000000) FORMAT Null + SELECT toInt128(number) - number FROM numbers_mt(1000000000) FORMAT Null + SELECT toInt128(number) * number FROM numbers_mt(1000000000) FORMAT Null + SELECT toInt128(number) / number FROM numbers_mt(1000000000) FORMAT Null + + SELECT toInt256(number) + number FROM numbers_mt(100000000) FORMAT Null + SELECT toInt256(number) - number FROM numbers_mt(100000000) FORMAT Null + SELECT toInt256(number) * number FROM numbers_mt(100000000) FORMAT Null + SELECT intDiv(toInt256(number + 1), number + 1) FROM numbers_mt(100000000) FORMAT Null + diff --git a/tests/queries/0_stateless/00396_uuid.reference b/tests/queries/0_stateless/00396_uuid.reference index fe92b3684a6..d70322ec4c1 100644 --- a/tests/queries/0_stateless/00396_uuid.reference +++ b/tests/queries/0_stateless/00396_uuid.reference @@ -5,3 +5,4 @@ 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 3f1ed72e-f7fe-4459-9cbe-95fe9298f845 +1 diff --git a/tests/queries/0_stateless/00396_uuid.sql b/tests/queries/0_stateless/00396_uuid.sql index d671ce844e2..9d8b48bddb0 100644 --- a/tests/queries/0_stateless/00396_uuid.sql +++ b/tests/queries/0_stateless/00396_uuid.sql @@ -5,3 +5,9 @@ SELECT hex(UUIDStringToNum(materialize('01234567-89ab-cdef-0123-456789abcdef'))) SELECT '01234567-89ab-cdef-0123-456789abcdef' AS str, UUIDNumToString(UUIDStringToNum(str)), UUIDNumToString(UUIDStringToNum(toFixedString(str, 36))); SELECT materialize('01234567-89ab-cdef-0123-456789abcdef') AS str, UUIDNumToString(UUIDStringToNum(str)), UUIDNumToString(UUIDStringToNum(toFixedString(str, 36))); SELECT toString(toUUID('3f1ed72e-f7fe-4459-9cbe-95fe9298f845')); + +-- conversion back and forth to big-endian hex string +with generateUUIDv4() as uuid, + identity(lower(hex(reverse(reinterpretAsString(uuid))))) as str, + reinterpretAsUUID(reverse(unhex(str))) as uuid2 +select uuid = uuid2; diff --git a/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh b/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh index af2bf2dca2b..380f6ee6ff8 100755 --- a/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh +++ b/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh @@ -12,7 +12,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE mutations_r1(d Date, x UInt32, s Stri ${CLICKHOUSE_CLIENT} --query="CREATE TABLE mutations_r2(d Date, x UInt32, s String, m MATERIALIZED x + 2) ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00652/mutations', 'r2', d, intDiv(x, 10), 8192)" # Test a mutation on empty table -${CLICKHOUSE_CLIENT} --query="ALTER TABLE mutations_r1 DELETE WHERE x = 1" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE mutations_r1 DELETE WHERE x = 1 SETTINGS mutations_sync = 2" # Insert some data ${CLICKHOUSE_CLIENT} --query="INSERT INTO mutations_r1(d, x, s) VALUES \ @@ -34,6 +34,8 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE mutations_r1 DELETE WHERE m = 3 SETTIN ${CLICKHOUSE_CLIENT} --query="INSERT INTO mutations_r1(d, x, s) VALUES \ ('2000-01-01', 5, 'e'), ('2000-02-01', 5, 'e')" +${CLICKHOUSE_CLIENT} --query "SYSTEM SYNC REPLICA mutations_r2" + # Check that the table contains only the data that should not be deleted. ${CLICKHOUSE_CLIENT} --query="SELECT d, x, s, m FROM mutations_r2 ORDER BY d, x" # Check the contents of the system.mutations table. diff --git a/tests/queries/0_stateless/00847_multiple_join_same_column.reference b/tests/queries/0_stateless/00847_multiple_join_same_column.reference index 4e813d5a677..91bd62ca5a3 100644 --- a/tests/queries/0_stateless/00847_multiple_join_same_column.reference +++ b/tests/queries/0_stateless/00847_multiple_join_same_column.reference @@ -31,18 +31,6 @@ y.b: 0 │ 1 │ 1 │ 1 │ │ 2 │ 2 │ 2 │ └─────┴─────┴─────┘ -┌─s.a─┬─s.a─┬─s_b─┬─s_b─┐ -│ 1 │ 1 │ 1 │ 1 │ -│ 0 │ 0 │ 0 │ 0 │ -└─────┴─────┴─────┴─────┘ -┌─y.a─┬─y.a─┬─y_b─┬─y_b─┐ -│ 1 │ 1 │ 1 │ 1 │ -│ 0 │ 0 │ 0 │ 0 │ -└─────┴─────┴─────┴─────┘ -┌─t_a─┬─t_a─┬─s_a─┬─s_a─┬─y_a─┬─y_a─┐ -│ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ -│ 2 │ 2 │ 0 │ 0 │ 0 │ 0 │ -└─────┴─────┴─────┴─────┴─────┴─────┘ ┌─s.a─┬─s.a─┬─s_b─┬─s.b─┐ │ 1 │ 1 │ 1 │ 1 │ │ 0 │ 0 │ 0 │ 0 │ diff --git a/tests/queries/0_stateless/00847_multiple_join_same_column.sql b/tests/queries/0_stateless/00847_multiple_join_same_column.sql index eae18dba7f3..c7f0c6383c2 100644 --- a/tests/queries/0_stateless/00847_multiple_join_same_column.sql +++ b/tests/queries/0_stateless/00847_multiple_join_same_column.sql @@ -39,28 +39,6 @@ left join y on y.b = s.b order by t.a format PrettyCompactNoEscapes; -set multiple_joins_rewriter_version = 1; - -select s.a, s.a, s.b as s_b, s.b from t -left join s on s.a = t.a -left join y on s.b = y.b -order by t.a -format PrettyCompactNoEscapes; - -select y.a, y.a, y.b as y_b, y.b from t -left join s on s.a = t.a -left join y on y.b = s.b -order by t.a -format PrettyCompactNoEscapes; - -select t.a, t.a as t_a, s.a, s.a as s_a, y.a, y.a as y_a from t -left join s on t.a = s.a -left join y on y.b = s.b -order by t.a -format PrettyCompactNoEscapes; - -set multiple_joins_rewriter_version = 2; - select s.a, s.a, s.b as s_b, s.b from t left join s on s.a = t.a left join y on s.b = y.b diff --git a/tests/queries/0_stateless/00849_multiple_comma_join.reference b/tests/queries/0_stateless/00849_multiple_comma_join.reference deleted file mode 100644 index f4db2238dd1..00000000000 --- a/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ /dev/null @@ -1,292 +0,0 @@ -SELECT a -FROM t1_00849 -CROSS JOIN t2_00849 -SELECT a -FROM t1_00849 -ALL INNER JOIN t2_00849 ON a = t2_00849.a -WHERE a = t2_00849.a -SELECT a -FROM t1_00849 -ALL INNER JOIN t2_00849 ON b = t2_00849.b -WHERE b = t2_00849.b -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a` -) AS `--.s` -ALL INNER JOIN t3_00849 ON `--t1_00849.a` = a -WHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - a AS `--t1_00849.a`, - b AS `--t1_00849.b`, - t2_00849.a, - t2_00849.b AS `--t2_00849.b` - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b` -) AS `--.s` -ALL INNER JOIN t3_00849 ON `--t1_00849.b` = b -WHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `--t2_00849.a`, - `t2_00849.b`, - a AS `--t3_00849.a`, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a` - ) AS `--.s` - ALL INNER JOIN t3_00849 ON `--t1_00849.a` = `--t3_00849.a` -) AS `--.s` -ALL INNER JOIN t4_00849 ON `--t1_00849.a` = a -WHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - `--t1_00849.b`, - `t2_00849.a`, - `--t2_00849.b`, - a, - b AS `--t3_00849.b` - FROM - ( - SELECT - a AS `--t1_00849.a`, - b AS `--t1_00849.b`, - t2_00849.a, - t2_00849.b AS `--t2_00849.b` - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b` - ) AS `--.s` - ALL INNER JOIN t3_00849 ON `--t1_00849.b` = `--t3_00849.b` -) AS `--.s` -ALL INNER JOIN t4_00849 ON `--t1_00849.b` = b -WHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `--t2_00849.a`, - `t2_00849.b`, - a AS `--t3_00849.a`, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t2_00849.a` = `--t1_00849.a` - ) AS `--.s` - ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a` -) AS `--.s` -ALL INNER JOIN t4_00849 ON `--t2_00849.a` = a -WHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `--t2_00849.a`, - `t2_00849.b`, - a AS `--t3_00849.a`, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - CROSS JOIN t2_00849 - ) AS `--.s` - ALL INNER JOIN t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) -) AS `--.s` -ALL INNER JOIN t4_00849 ON `--t3_00849.a` = a -WHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `--t2_00849.a`, - `t2_00849.b`, - a AS `--t3_00849.a`, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - CROSS JOIN t2_00849 - ) AS `--.s` - CROSS JOIN t3_00849 -) AS `--.s` -ALL INNER JOIN t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -WHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `--t2_00849.a`, - `t2_00849.b`, - a AS `--t3_00849.a`, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a` - ) AS `--.s` - ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a` -) AS `--.s` -ALL INNER JOIN t4_00849 ON `--t3_00849.a` = a -WHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `t2_00849.a`, - `t2_00849.b`, - a, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a, - t2_00849.b - FROM t1_00849 - CROSS JOIN t2_00849 - ) AS `--.s` - CROSS JOIN t3_00849 -) AS `--.s` -CROSS JOIN t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - `--t1_00849.a`, - b, - `t2_00849.a`, - `t2_00849.b`, - a, - t3_00849.b - FROM - ( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a, - t2_00849.b - FROM t1_00849 - CROSS JOIN t2_00849 - ) AS `--.s` - CROSS JOIN t3_00849 -) AS `--.s` -CROSS JOIN t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a, - t2_00849.b - FROM t1_00849 - CROSS JOIN t2_00849 -) AS `--.s` -CROSS JOIN t3_00849 -SELECT `--t1_00849.a` AS `t1_00849.a` -FROM -( - SELECT - a AS `--t1_00849.a`, - b, - t2_00849.a AS `--t2_00849.a`, - t2_00849.b - FROM t1_00849 - ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a` -) AS `--.s` -CROSS JOIN t3_00849 -SELECT * FROM t1, t2 -1 1 1 1 -1 1 1 \N -2 2 1 1 -2 2 1 \N -3 3 1 1 -3 3 1 \N -4 4 1 1 -4 4 1 \N -SELECT * FROM t1, t2 WHERE t1.a = t2.a -1 1 1 1 -1 1 1 \N -SELECT t1.a, t2.a FROM t1, t2 WHERE t1.b = t2.b -1 1 -SELECT t1.a, t2.b, t3.b FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a -1 1 1 -1 1 \N -1 \N 1 -1 \N \N -SELECT t1.a, t2.b, t3.b FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b -1 1 1 -SELECT t1.a, t2.b, t3.b, t4.b FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a -1 1 1 1 -1 1 1 \N -1 1 \N 1 -1 1 \N \N -1 \N 1 1 -1 \N 1 \N -1 \N \N 1 -1 \N \N \N -SELECT t1.a, t2.b, t3.b, t4.b FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b -1 1 1 1 -SELECT t1.a, t2.b, t3.b, t4.b FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a -1 1 1 1 -1 1 1 \N -1 1 \N 1 -1 1 \N \N -1 \N 1 1 -1 \N 1 \N -1 \N \N 1 -1 \N \N \N diff --git a/tests/queries/0_stateless/00849_multiple_comma_join.sql b/tests/queries/0_stateless/00849_multiple_comma_join.sql deleted file mode 100644 index 5a8962c2277..00000000000 --- a/tests/queries/0_stateless/00849_multiple_comma_join.sql +++ /dev/null @@ -1,69 +0,0 @@ -SET enable_debug_queries = 1; -SET enable_optimize_predicate_expression = 0; -SET multiple_joins_rewriter_version = 1; - -DROP TABLE IF EXISTS t1_00849; -DROP TABLE IF EXISTS t2_00849; -DROP TABLE IF EXISTS t3_00849; -DROP TABLE IF EXISTS t4_00849; - -CREATE TABLE t1_00849 (a UInt32, b Nullable(Int32)) ENGINE = Memory; -CREATE TABLE t2_00849 (a UInt32, b Nullable(Int32)) ENGINE = Memory; -CREATE TABLE t3_00849 (a UInt32, b Nullable(Int32)) ENGINE = Memory; -CREATE TABLE t4_00849 (a UInt32, b Nullable(Int32)) ENGINE = Memory; - -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849 WHERE t1_00849.a = t2_00849.a; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849 WHERE t1_00849.b = t2_00849.b; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849 WHERE t1_00849.a = t2_00849.a AND t1_00849.a = t3_00849.a; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849 WHERE t1_00849.b = t2_00849.b AND t1_00849.b = t3_00849.b; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t1_00849.a = t2_00849.a AND t1_00849.a = t3_00849.a AND t1_00849.a = t4_00849.a; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t1_00849.b = t2_00849.b AND t1_00849.b = t3_00849.b AND t1_00849.b = t4_00849.b; - -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t2_00849.a = t1_00849.a AND t2_00849.a = t3_00849.a AND t2_00849.a = t4_00849.a; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t3_00849.a = t1_00849.a AND t3_00849.a = t2_00849.a AND t3_00849.a = t4_00849.a; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t4_00849.a = t1_00849.a AND t4_00849.a = t2_00849.a AND t4_00849.a = t3_00849.a; -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t1_00849.a = t2_00849.a AND t2_00849.a = t3_00849.a AND t3_00849.a = t4_00849.a; - -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849; -ANALYZE SELECT t1_00849.a FROM t1_00849 CROSS JOIN t2_00849 CROSS JOIN t3_00849 CROSS JOIN t4_00849; - -ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849 CROSS JOIN t3_00849; -ANALYZE SELECT t1_00849.a FROM t1_00849 JOIN t2_00849 USING a CROSS JOIN t3_00849; -- { serverError 48 } -ANALYZE SELECT t1_00849.a FROM t1_00849 JOIN t2_00849 ON t1_00849.a = t2_00849.a CROSS JOIN t3_00849; - -INSERT INTO t1_00849 values (1,1), (2,2), (3,3), (4,4); -INSERT INTO t2_00849 values (1,1), (1, Null); -INSERT INTO t3_00849 values (1,1), (1, Null); -INSERT INTO t4_00849 values (1,1), (1, Null); - -SELECT 'SELECT * FROM t1, t2'; -SELECT * FROM t1_00849, t2_00849 -ORDER BY t1_00849.a, t2_00849.b; -SELECT 'SELECT * FROM t1, t2 WHERE t1.a = t2.a'; -SELECT * FROM t1_00849, t2_00849 WHERE t1_00849.a = t2_00849.a -ORDER BY t1_00849.a, t2_00849.b; -SELECT 'SELECT t1.a, t2.a FROM t1, t2 WHERE t1.b = t2.b'; -SELECT t1_00849.a, t2_00849.b FROM t1_00849, t2_00849 WHERE t1_00849.b = t2_00849.b; -SELECT 'SELECT t1.a, t2.b, t3.b FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a'; -SELECT t1_00849.a, t2_00849.b, t3_00849.b FROM t1_00849, t2_00849, t3_00849 -WHERE t1_00849.a = t2_00849.a AND t1_00849.a = t3_00849.a -ORDER BY t2_00849.b, t3_00849.b; -SELECT 'SELECT t1.a, t2.b, t3.b FROM t1, t2, t3 WHERE t1.b = t2.b AND t1.b = t3.b'; -SELECT t1_00849.a, t2_00849.b, t3_00849.b FROM t1_00849, t2_00849, t3_00849 WHERE t1_00849.b = t2_00849.b AND t1_00849.b = t3_00849.b; -SELECT 'SELECT t1.a, t2.b, t3.b, t4.b FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t1.a = t3.a AND t1.a = t4.a'; -SELECT t1_00849.a, t2_00849.b, t3_00849.b, t4_00849.b FROM t1_00849, t2_00849, t3_00849, t4_00849 -WHERE t1_00849.a = t2_00849.a AND t1_00849.a = t3_00849.a AND t1_00849.a = t4_00849.a -ORDER BY t2_00849.b, t3_00849.b, t4_00849.b; -SELECT 'SELECT t1.a, t2.b, t3.b, t4.b FROM t1, t2, t3, t4 WHERE t1.b = t2.b AND t1.b = t3.b AND t1.b = t4.b'; -SELECT t1_00849.a, t2_00849.b, t3_00849.b, t4_00849.b FROM t1_00849, t2_00849, t3_00849, t4_00849 -WHERE t1_00849.b = t2_00849.b AND t1_00849.b = t3_00849.b AND t1_00849.b = t4_00849.b; -SELECT 'SELECT t1.a, t2.b, t3.b, t4.b FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3.a = t4.a'; -SELECT t1_00849.a, t2_00849.b, t3_00849.b, t4_00849.b FROM t1_00849, t2_00849, t3_00849, t4_00849 -WHERE t1_00849.a = t2_00849.a AND t2_00849.a = t3_00849.a AND t3_00849.a = t4_00849.a -ORDER BY t2_00849.b, t3_00849.b, t4_00849.b; - -DROP TABLE t1_00849; -DROP TABLE t2_00849; -DROP TABLE t3_00849; -DROP TABLE t4_00849; diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index de09bcc0b4d..710327ec0e4 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -1,6 +1,5 @@ SET enable_debug_queries = 1; SET enable_optimize_predicate_expression = 0; -SET multiple_joins_rewriter_version = 2; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/00854_multiple_join_asterisks.sql b/tests/queries/0_stateless/00854_multiple_join_asterisks.sql index 43aef45440c..c01ea721000 100644 --- a/tests/queries/0_stateless/00854_multiple_join_asterisks.sql +++ b/tests/queries/0_stateless/00854_multiple_join_asterisks.sql @@ -4,14 +4,6 @@ select t1.* from system.one t1 join system.one t2 on t1.dummy = t2.dummy join sy select t2.*, t3.* from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; select t1.dummy, t2.*, t3.dummy from system.one t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; -set multiple_joins_rewriter_version = 1; - -select t1.dummy, t2.*, t3.dummy from (select * from system.one) t1 -join system.one t2 on t1.dummy = t2.dummy -join system.one t3 ON t1.dummy = t3.dummy; -- { serverError 48 } - -set multiple_joins_rewriter_version = 2; - select t1.dummy, t2.*, t3.dummy from (select * from system.one) t1 join system.one t2 on t1.dummy = t2.dummy join system.one t3 ON t1.dummy = t3.dummy; diff --git a/tests/queries/0_stateless/01116_cross_count_asterisks.reference b/tests/queries/0_stateless/01116_cross_count_asterisks.reference index 8347b144a35..5f1d0ecea5d 100644 --- a/tests/queries/0_stateless/01116_cross_count_asterisks.reference +++ b/tests/queries/0_stateless/01116_cross_count_asterisks.reference @@ -1,4 +1,2 @@ 2 1 -2 -1 diff --git a/tests/queries/0_stateless/01116_cross_count_asterisks.sql b/tests/queries/0_stateless/01116_cross_count_asterisks.sql index 1fb8b0b0e66..aa5adaddae5 100644 --- a/tests/queries/0_stateless/01116_cross_count_asterisks.sql +++ b/tests/queries/0_stateless/01116_cross_count_asterisks.sql @@ -1,20 +1,3 @@ -SET multiple_joins_rewriter_version = 2; - -SELECT count(*) -FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3 -WHERE (n1.number = n2.number) AND (n2.number = n3.number); - -SELECT count(*) c FROM ( - SELECT count(*), count(*) as c - FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3 - WHERE (n1.number = n2.number) AND (n2.number = n3.number) - AND (SELECT count(*) FROM numbers(1)) = 1 -) -WHERE (SELECT count(*) FROM numbers(2)) = 2 -HAVING c IN(SELECT count(*) c FROM numbers(1)); - -SET multiple_joins_rewriter_version = 1; - SELECT count(*) FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3 WHERE (n1.number = n2.number) AND (n2.number = n3.number); diff --git a/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql b/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql index c90d01ff76d..ae844888407 100644 --- a/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql +++ b/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql @@ -11,7 +11,6 @@ INSERT INTO t1 (id, value1) VALUES (1, 'val11'); INSERT INTO t2 (id, value2) VALUES (1, 'val21'); INSERT INTO t3 (id, value3) VALUES (1, 'val31'); -SET multiple_joins_rewriter_version = 2; SET enable_optimize_predicate_expression = 1; SELECT t1.id, t2.id as id, t3.id as value diff --git a/tests/queries/0_stateless/01144_multiple_joins_rewriter_v2_and_lambdas.sql b/tests/queries/0_stateless/01144_multiple_joins_rewriter_v2_and_lambdas.sql index daf93649f89..6a5ba042c63 100644 --- a/tests/queries/0_stateless/01144_multiple_joins_rewriter_v2_and_lambdas.sql +++ b/tests/queries/0_stateless/01144_multiple_joins_rewriter_v2_and_lambdas.sql @@ -1,5 +1,3 @@ -set multiple_joins_rewriter_version = 2; - select arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent, test, query diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.reference b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.reference new file mode 100644 index 00000000000..d217855586b --- /dev/null +++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.reference @@ -0,0 +1,4 @@ +CREATE TABLE default.rmt\n(\n `n` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test_01148/{shard}/default/rmt\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rmt1\n(\n `n` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test_01148/{shard}/default/rmt\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rmt\n(\n `n` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'{default_path_test}test_01148\', \'{default_name_test}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rmt\n(\n `n` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'{default_path_test}test_01148\', \'{default_name_test}\')\nORDER BY n\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql new file mode 100644 index 00000000000..e13b25d1706 --- /dev/null +++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS rmt; + +CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('/clickhouse/test_01148/{shard}/{database}/{table}', '{replica}') ORDER BY n; +SHOW CREATE TABLE rmt; +RENAME TABLE rmt TO rmt1; +DETACH TABLE rmt1; +ATTACH TABLE rmt1; +SHOW CREATE TABLE rmt1; + +CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}{uuid}', '{default_name_test}') ORDER BY n; -- { serverError 62 } +CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}test_01148', '{default_name_test}') ORDER BY n; +SHOW CREATE TABLE rmt; +RENAME TABLE rmt TO rmt2; -- { serverError 48 } +DETACH TABLE rmt; +ATTACH TABLE rmt; +SHOW CREATE TABLE rmt; + +DROP TABLE rmt; +DROP TABLE rmt1; diff --git a/tests/queries/0_stateless/01323_add_scalars_in_time.reference b/tests/queries/0_stateless/01323_add_scalars_in_time.reference index 49e20d0fea8..408efa7f823 100644 --- a/tests/queries/0_stateless/01323_add_scalars_in_time.reference +++ b/tests/queries/0_stateless/01323_add_scalars_in_time.reference @@ -1,2 +1,5 @@ [0,2,3] id2 [1,2,3] id1 +test [1,2,3,4] +2 fre +3 jhg diff --git a/tests/queries/0_stateless/01323_add_scalars_in_time.sql b/tests/queries/0_stateless/01323_add_scalars_in_time.sql index b49c2ef416e..2d7cf270017 100644 --- a/tests/queries/0_stateless/01323_add_scalars_in_time.sql +++ b/tests/queries/0_stateless/01323_add_scalars_in_time.sql @@ -17,3 +17,55 @@ WHERE id LIKE 'id%' GROUP BY id; DROP TABLE tags; + + +-- https://github.com/ClickHouse/ClickHouse/issues/15294 + +drop table if exists TestTable; + +create table TestTable (column String, start DateTime, end DateTime) engine MergeTree order by start; + +insert into TestTable (column, start, end) values('test', toDateTime('2020-07-20 09:00:00'), toDateTime('2020-07-20 20:00:00')),('test1', toDateTime('2020-07-20 09:00:00'), toDateTime('2020-07-20 20:00:00')),('test2', toDateTime('2020-07-20 09:00:00'), toDateTime('2020-07-20 20:00:00')); + +SELECT column, +(SELECT d from (select [1, 2, 3, 4] as d)) as d +FROM TestTable +where column == 'test' +GROUP BY column; + +drop table TestTable; + +-- https://github.com/ClickHouse/ClickHouse/issues/11407 + +drop table if exists aaa; +drop table if exists bbb; + +CREATE TABLE aaa ( + id UInt16, + data String +) +ENGINE = MergeTree() +PARTITION BY tuple() +ORDER BY id; + +INSERT INTO aaa VALUES (1, 'sef'),(2, 'fre'),(3, 'jhg'); + +CREATE TABLE bbb ( + id UInt16, + data String +) +ENGINE = MergeTree() +PARTITION BY tuple() +ORDER BY id; + +INSERT INTO bbb VALUES (2, 'fre'), (3, 'jhg'); + +with (select groupArray(id) from bbb) as ids +select * + from aaa + where has(ids, id) +order by id; + + +drop table aaa; +drop table bbb; diff --git a/tests/queries/0_stateless/01380_coded_delta_exception_code.sql b/tests/queries/0_stateless/01380_coded_delta_exception_code.sql index fc679e30e3f..587fac958cd 100644 --- a/tests/queries/0_stateless/01380_coded_delta_exception_code.sql +++ b/tests/queries/0_stateless/01380_coded_delta_exception_code.sql @@ -1,7 +1,3 @@ -CREATE TABLE delta_codec_synthetic (`id` UInt64 NULL CODEC(Delta, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE delta_codec_synthetic (`id` UInt64 NULL CODEC(DoubleDelta, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE delta_codec_synthetic (`id` UInt64 NULL CODEC(Gorilla, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } - CREATE TABLE delta_codec_synthetic (`id` Decimal(38, 10) CODEC(Delta, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } CREATE TABLE delta_codec_synthetic (`id` Decimal(38, 10) CODEC(DoubleDelta, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } CREATE TABLE delta_codec_synthetic (`id` Decimal(38, 10) CODEC(Gorilla, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01460_line_as_string_format.reference b/tests/queries/0_stateless/01460_line_as_string_format.reference index c795fba4309..c72d3e1f15c 100644 --- a/tests/queries/0_stateless/01460_line_as_string_format.reference +++ b/tests/queries/0_stateless/01460_line_as_string_format.reference @@ -6,3 +6,5 @@ Finally implement this new feature. 42 ClickHouse 42 ClickHouse is a `fast` #open-source# (OLAP) database "management" :system: +1000000 +1000 diff --git a/tests/queries/0_stateless/01460_line_as_string_format.sh b/tests/queries/0_stateless/01460_line_as_string_format.sh index 60a960d2bf8..4f94111df08 100755 --- a/tests/queries/0_stateless/01460_line_as_string_format.sh +++ b/tests/queries/0_stateless/01460_line_as_string_format.sh @@ -29,3 +29,15 @@ echo 'ClickHouse is a `fast` #open-source# (OLAP) 'database' "management" :syste $CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string2 order by c"; $CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string2" + +$CLICKHOUSE_CLIENT --query="select repeat('aaa',50) from numbers(1000000)" > "${CLICKHOUSE_TMP}"/data1 +$CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string3(field String) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string3 FORMAT LineAsString" < "${CLICKHOUSE_TMP}"/data1 +$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM line_as_string3"; +$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string3" + +$CLICKHOUSE_CLIENT --query="select randomString(50000) FROM numbers(1000)" > "${CLICKHOUSE_TMP}"/data2 +$CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string4(field String) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string4 FORMAT LineAsString" < "${CLICKHOUSE_TMP}"/data2 +$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM line_as_string4"; +$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string4" diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.reference b/tests/queries/0_stateless/01473_event_time_microseconds.reference index 8aa31f9ab6a..b5533c11268 100644 --- a/tests/queries/0_stateless/01473_event_time_microseconds.reference +++ b/tests/queries/0_stateless/01473_event_time_microseconds.reference @@ -1,2 +1,10 @@ 01473_metric_log_table_event_start_time_microseconds_test ok +01473_trace_log_table_event_start_time_microseconds_test +ok +01473_query_log_table_event_start_time_microseconds_test +ok +01473_query_thread_log_table_event_start_time_microseconds_test +ok +01473_text_log_table_event_start_time_microseconds_test +ok diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql index 138df77ffec..2e536bba7ac 100644 --- a/tests/queries/0_stateless/01473_event_time_microseconds.sql +++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql @@ -3,22 +3,55 @@ -- an integration test as those metrics take 60s by default to be updated. -- Refer: tests/integration/test_asynchronous_metric_log_table. -set log_queries = 1; +SET log_queries = 1; +SET query_profiler_real_time_period_ns = 100000000; +-- a long enough query to trigger the query profiler and to record trace log +SELECT sleep(2) FORMAT Null; +SET query_profiler_real_time_period_ns = 1000000000; +SYSTEM FLUSH LOGS; -select '01473_metric_log_table_event_start_time_microseconds_test'; -system flush logs; +SELECT '01473_metric_log_table_event_start_time_microseconds_test'; -- query assumes that the event_time field is accurate. WITH ( - ( - SELECT event_time_microseconds + SELECT event_time_microseconds, event_time FROM system.metric_log ORDER BY event_time DESC LIMIT 1 - ) AS time_with_microseconds, - ( - SELECT event_time - FROM system.metric_log + ) AS time +SELECT if(dateDiff('second', toDateTime(time.1), toDateTime(time.2)) = 0, 'ok', toString(time)); + +SELECT '01473_trace_log_table_event_start_time_microseconds_test'; +WITH ( + SELECT event_time_microseconds, event_time + FROM system.trace_log + ORDER BY event_time DESC + LIMIT 1 + ) AS time +SELECT if(dateDiff('second', toDateTime(time.1), toDateTime(time.2)) = 0, 'ok', toString(time)); + +SELECT '01473_query_log_table_event_start_time_microseconds_test'; +WITH ( + SELECT event_time_microseconds, event_time + FROM system.query_log ORDER BY event_time DESC LIMIT 1 - ) AS time) -SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail') + ) AS time +SELECT if(dateDiff('second', toDateTime(time.1), toDateTime(time.2)) = 0, 'ok', toString(time)); + +SELECT '01473_query_thread_log_table_event_start_time_microseconds_test'; +WITH ( + SELECT event_time_microseconds, event_time + FROM system.query_thread_log + ORDER BY event_time DESC + LIMIT 1 + ) AS time +SELECT if(dateDiff('second', toDateTime(time.1), toDateTime(time.2)) = 0, 'ok', toString(time)); + +SELECT '01473_text_log_table_event_start_time_microseconds_test'; +WITH ( + SELECT event_time_microseconds, event_time + FROM system.query_thread_log + ORDER BY event_time DESC + LIMIT 1 + ) AS time +SELECT if(dateDiff('second', toDateTime(time.1), toDateTime(time.2)) = 0, 'ok', toString(time)); diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference new file mode 100644 index 00000000000..b2c3ea56b7f --- /dev/null +++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.reference @@ -0,0 +1,7 @@ +0 1 +0 1 +0 1 +0 1 +1 +0 1 +0 1 diff --git a/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql new file mode 100644 index 00000000000..a6678ca9040 --- /dev/null +++ b/tests/queries/0_stateless/01483_merge_table_join_and_group_by.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS m; + +CREATE TABLE a (key UInt32) ENGINE = MergeTree ORDER BY key; +CREATE TABLE b (key UInt32, ID UInt32) ENGINE = MergeTree ORDER BY key; +CREATE TABLE m (key UInt32) ENGINE = Merge(currentDatabase(), 'a'); + +INSERT INTO a VALUES (0); +INSERT INTO b VALUES (0, 1); + +SELECT * FROM m INNER JOIN b USING(key); +SELECT * FROM a INNER JOIN b USING(key) GROUP BY ID, key; +SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1; +SELECT * FROM m INNER JOIN b USING(key) GROUP BY ID, key; +SELECT ID FROM m INNER JOIN b USING(key) GROUP BY ID; +SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 HAVING ID = 1 ORDER BY ID; +SELECT * FROM m INNER JOIN b USING(key) WHERE ID = 1 GROUP BY ID, key HAVING ID = 1 ORDER BY ID; + +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS m; diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.reference b/tests/queries/0_stateless/01485_256_bit_multiply.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01485_256_bit_multiply.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql new file mode 100644 index 00000000000..f3f6cc05058 --- /dev/null +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -0,0 +1,4 @@ +select count() from +( + select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y +); diff --git a/tests/queries/0_stateless/01493_storage_set_persistency.reference b/tests/queries/0_stateless/01493_storage_set_persistency.reference new file mode 100644 index 00000000000..c721770a22a --- /dev/null +++ b/tests/queries/0_stateless/01493_storage_set_persistency.reference @@ -0,0 +1,5 @@ +----- Default Settings ----- +1 +----- Settings persistent=1 ----- +1 +----- Settings persistent=0 ----- diff --git a/tests/queries/0_stateless/01493_storage_set_persistency.sql b/tests/queries/0_stateless/01493_storage_set_persistency.sql new file mode 100644 index 00000000000..9298a24bf73 --- /dev/null +++ b/tests/queries/0_stateless/01493_storage_set_persistency.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS set; +DROP TABLE IF EXISTS number; + +CREATE TABLE number (number UInt64) ENGINE = Memory(); +INSERT INTO number values (1); + +SELECT '----- Default Settings -----'; +CREATE TABLE set (val UInt64) ENGINE = Set(); +INSERT INTO set VALUES (1); +DETACH TABLE set; +ATTACH TABLE set; +SELECT number FROM number WHERE number IN set LIMIT 1; + +DROP TABLE set; + +SELECT '----- Settings persistent=1 -----'; +CREATE TABLE set (val UInt64) ENGINE = Set() SETTINGS persistent=1; +INSERT INTO set VALUES (1); +DETACH TABLE set; +ATTACH TABLE set; +SELECT number FROM number WHERE number IN set LIMIT 1; + +DROP TABLE set; + +SELECT '----- Settings persistent=0 -----'; +CREATE TABLE set (val UInt64) ENGINE = Set() SETTINGS persistent=0; +INSERT INTO set VALUES (1); +DETACH TABLE set; +ATTACH TABLE set; +SELECT number FROM number WHERE number IN set LIMIT 1; + +DROP TABLE set; +DROP TABLE number; diff --git a/tests/queries/0_stateless/01494_storage_join_persistency.reference b/tests/queries/0_stateless/01494_storage_join_persistency.reference new file mode 100644 index 00000000000..31ba11136ea --- /dev/null +++ b/tests/queries/0_stateless/01494_storage_join_persistency.reference @@ -0,0 +1,5 @@ +----- Default Settings ----- +1 21 +----- Settings persistent=1 ----- +1 21 +----- Settings persistent=0 ----- diff --git a/tests/queries/0_stateless/01494_storage_join_persistency.sql b/tests/queries/0_stateless/01494_storage_join_persistency.sql new file mode 100644 index 00000000000..c362a78dccd --- /dev/null +++ b/tests/queries/0_stateless/01494_storage_join_persistency.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS join; + +SELECT '----- Default Settings -----'; +CREATE TABLE join (k UInt64, s String) ENGINE = Join(ANY, LEFT, k); +INSERT INTO join VALUES (1,21); +DETACH TABLE join; +ATTACH TABLE join; +SELECT * from join; + +DROP TABLE join; + +SELECT '----- Settings persistent=1 -----'; +CREATE TABLE join (k UInt64, s String) ENGINE = Join(ANY, LEFT, k) SETTINGS persistent=1; +INSERT INTO join VALUES (1,21); +DETACH TABLE join; +ATTACH TABLE join; +SELECT * from join; + +DROP TABLE join; + +SELECT '----- Settings persistent=0 -----'; +CREATE TABLE join (k UInt64, s String) ENGINE = Join(ANY, LEFT, k) SETTINGS persistent=0; +INSERT INTO join VALUES (1,21); +DETACH TABLE join; +ATTACH TABLE join; +SELECT * from join; + +DROP TABLE join; diff --git a/tests/queries/0_stateless/01497_now_support_timezone.reference b/tests/queries/0_stateless/01497_now_support_timezone.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01497_now_support_timezone.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01497_now_support_timezone.sql b/tests/queries/0_stateless/01497_now_support_timezone.sql new file mode 100644 index 00000000000..b1e9bad58c5 --- /dev/null +++ b/tests/queries/0_stateless/01497_now_support_timezone.sql @@ -0,0 +1 @@ +SELECT (toHour(now('Asia/Shanghai')) - toHour(now('UTC'))) IN (8, -16); diff --git a/tests/queries/0_stateless/01504_compression_multiple_streams.reference b/tests/queries/0_stateless/01504_compression_multiple_streams.reference new file mode 100644 index 00000000000..4d3aba66526 --- /dev/null +++ b/tests/queries/0_stateless/01504_compression_multiple_streams.reference @@ -0,0 +1,23 @@ +1 1 [[1]] (1,[1]) +1 1 [[1]] (1,[1]) +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +1 1 [[1]] (1,[1]) +2 2 [[2]] (2,[2]) +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +1 1 [[1]] (1,[1]) +2 2 [[2]] (2,[2]) +3 3 [[3]] (3,[3]) +1 1 [[1]] (1,[1]) +1 1 [[1]] (1,[1]) +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +1 1 [[1]] (1,[1]) +2 2 [[2]] (2,[2]) +1 1 [[1]] (1,[1]) +2 2 [[2]] (2,[2]) +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +1 1 [[1]] (1,[1]) +2 2 [[2]] (2,[2]) +3 3 [[3]] (3,[3]) +1 +2 +3 diff --git a/tests/queries/0_stateless/01504_compression_multiple_streams.sql b/tests/queries/0_stateless/01504_compression_multiple_streams.sql new file mode 100644 index 00000000000..7cdf1b52651 --- /dev/null +++ b/tests/queries/0_stateless/01504_compression_multiple_streams.sql @@ -0,0 +1,116 @@ +DROP TABLE IF EXISTS columns_with_multiple_streams; + +SET mutations_sync = 2; + +CREATE TABLE columns_with_multiple_streams ( + field0 Nullable(Int64) CODEC(Delta(2), LZ4), + field1 Nullable(Int64) CODEC(Delta, LZ4), + field2 Array(Array(Int64)) CODEC(Delta, LZ4), + field3 Tuple(UInt32, Array(UInt64)) CODEC(T64, Default) +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; + +INSERT INTO columns_with_multiple_streams VALUES(1, 1, [[1]], tuple(1, [1])); + +SELECT * FROM columns_with_multiple_streams; + +DETACH TABLE columns_with_multiple_streams; +ATTACH TABLE columns_with_multiple_streams; + +SELECT * FROM columns_with_multiple_streams; + +ALTER TABLE columns_with_multiple_streams MODIFY COLUMN field1 Nullable(UInt8); + +INSERT INTO columns_with_multiple_streams VALUES(2, 2, [[2]], tuple(2, [2])); + +SHOW CREATE TABLE columns_with_multiple_streams; + +SELECT * FROM columns_with_multiple_streams ORDER BY field0; + +ALTER TABLE columns_with_multiple_streams MODIFY COLUMN field3 CODEC(Delta, Default); + +SHOW CREATE TABLE columns_with_multiple_streams; + +INSERT INTO columns_with_multiple_streams VALUES(3, 3, [[3]], tuple(3, [3])); + +OPTIMIZE TABLE columns_with_multiple_streams FINAL; + +SELECT * FROM columns_with_multiple_streams ORDER BY field0; + +DROP TABLE IF EXISTS columns_with_multiple_streams; + +DROP TABLE IF EXISTS columns_with_multiple_streams_compact; + +CREATE TABLE columns_with_multiple_streams_compact ( + field0 Nullable(Int64) CODEC(Delta(2), LZ4), + field1 Nullable(Int64) CODEC(Delta, LZ4), + field2 Array(Array(Int64)) CODEC(Delta, LZ4), + field3 Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default) +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000; + +INSERT INTO columns_with_multiple_streams_compact VALUES(1, 1, [[1]], tuple(1, [1])); + +SELECT * FROM columns_with_multiple_streams_compact; + +DETACH TABLE columns_with_multiple_streams_compact; +ATTACH TABLE columns_with_multiple_streams_compact; + +SELECT * FROM columns_with_multiple_streams_compact; + +ALTER TABLE columns_with_multiple_streams_compact MODIFY COLUMN field1 Nullable(UInt8); + +INSERT INTO columns_with_multiple_streams_compact VALUES(2, 2, [[2]], tuple(2, [2])); + +SHOW CREATE TABLE columns_with_multiple_streams_compact; + +SELECT * FROM columns_with_multiple_streams_compact ORDER BY field0; + +ALTER TABLE columns_with_multiple_streams_compact MODIFY COLUMN field3 CODEC(Delta, Default); + +SELECT * FROM columns_with_multiple_streams_compact ORDER BY field0; + +SHOW CREATE TABLE columns_with_multiple_streams_compact; + +INSERT INTO columns_with_multiple_streams_compact VALUES(3, 3, [[3]], tuple(3, [3])); + +SELECT * FROM columns_with_multiple_streams_compact ORDER BY field0; + +DROP TABLE IF EXISTS columns_with_multiple_streams_compact; + +DROP TABLE IF EXISTS columns_with_multiple_streams_bad_case; + +-- validation still works, non-sense codecs checked +CREATE TABLE columns_with_multiple_streams_bad_case ( + field0 Nullable(String) CODEC(Delta, LZ4) +) +ENGINE = MergeTree +ORDER BY tuple(); --{serverError 36} + +CREATE TABLE columns_with_multiple_streams_bad_case ( + field0 Tuple(Array(UInt64), String) CODEC(T64, LZ4) +) +ENGINE = MergeTree +ORDER BY tuple(); --{serverError 431} + +SET allow_suspicious_codecs = 1; + +CREATE TABLE columns_with_multiple_streams_bad_case ( + field0 Nullable(UInt64) CODEC(Delta) +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO columns_with_multiple_streams_bad_case VALUES(1), (2); + +INSERT INTO columns_with_multiple_streams_bad_case VALUES(3); + +OPTIMIZE TABLE columns_with_multiple_streams_bad_case FINAL; + +SELECT * FROM columns_with_multiple_streams_bad_case ORDER BY field0; + +DROP TABLE IF EXISTS columns_with_multiple_streams_bad_case; diff --git a/tests/queries/0_stateless/01505_pipeline_executor_UAF.reference b/tests/queries/0_stateless/01505_pipeline_executor_UAF.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh new file mode 100755 index 00000000000..283e6662a43 --- /dev/null +++ b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# Regression for UAF in ThreadPool. +# (Triggered under TSAN) +for i in {1..10}; do + ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' + # Binding to specific CPU is not required, but this makes the test more reliable. + taskset --cpu-list 0 ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' +done diff --git a/tests/queries/0_stateless/01508_explain_header.reference b/tests/queries/0_stateless/01508_explain_header.reference new file mode 100644 index 00000000000..50216432e14 --- /dev/null +++ b/tests/queries/0_stateless/01508_explain_header.reference @@ -0,0 +1,7 @@ +Expression (Projection) +Header: x UInt8 + Expression (Before ORDER BY and SELECT) + Header: _dummy UInt8 + 1 UInt8 + ReadFromStorage (Read from SystemOne) + Header: dummy UInt8 diff --git a/tests/queries/0_stateless/01508_explain_header.sql b/tests/queries/0_stateless/01508_explain_header.sql new file mode 100644 index 00000000000..fb274c84d8a --- /dev/null +++ b/tests/queries/0_stateless/01508_explain_header.sql @@ -0,0 +1 @@ +explain header = 1 select 1 as x; diff --git a/tests/queries/0_stateless/01508_format_regexp_raw.reference b/tests/queries/0_stateless/01508_format_regexp_raw.reference new file mode 100644 index 00000000000..f0e7d928857 --- /dev/null +++ b/tests/queries/0_stateless/01508_format_regexp_raw.reference @@ -0,0 +1 @@ +abc\\ Hello, world! diff --git a/tests/queries/0_stateless/01508_format_regexp_raw.sh b/tests/queries/0_stateless/01508_format_regexp_raw.sh new file mode 100755 index 00000000000..699fca1be61 --- /dev/null +++ b/tests/queries/0_stateless/01508_format_regexp_raw.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS t; +CREATE TABLE t (a String, b String) ENGINE = Memory; +" + +${CLICKHOUSE_CLIENT} --format_regexp_escaping_rule 'Raw' --format_regexp '^(.+?) separator (.+?)$' --query ' +INSERT INTO t FORMAT Regexp abc\ separator Hello, world!' + +${CLICKHOUSE_CLIENT} -n --query " +SELECT * FROM t; +DROP TABLE t; +" diff --git a/tests/queries/0_stateless/01508_partition_pruning.queries b/tests/queries/0_stateless/01508_partition_pruning.queries new file mode 100644 index 00000000000..3773e907c53 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning.queries @@ -0,0 +1,124 @@ +DROP TABLE IF EXISTS tMM; +DROP TABLE IF EXISTS tDD; +DROP TABLE IF EXISTS sDD; +DROP TABLE IF EXISTS xMM; +CREATE TABLE tMM(d DateTime,a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +SYSTEM STOP MERGES tMM; +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00') + number*60, number FROM numbers(5000); + +CREATE TABLE tDD(d DateTime,a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +SYSTEM STOP MERGES tDD; +insert into tDD select toDateTime(toDate('2020-09-23')), number from numbers(10000) UNION ALL select toDateTime(toDateTime('2020-09-23 11:00:00')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-24')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-25')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-08-15')), number from numbers(10000); + +CREATE TABLE sDD(d UInt64,a Int) ENGINE = MergeTree PARTITION BY toYYYYMM(toDate(intDiv(d,1000))) ORDER BY tuple() SETTINGS index_granularity = 8192; +SYSTEM STOP MERGES sDD; +insert into sDD select (1597536000+number*60)*1000, number from numbers(5000); +insert into sDD select (1597536000+number*60)*1000, number from numbers(5000); +insert into sDD select (1598918400+number*60)*1000, number from numbers(5000); +insert into sDD select (1598918400+number*60)*1000, number from numbers(5000); +insert into sDD select (1601510400+number*60)*1000, number from numbers(5000); +insert into sDD select (1602720000+number*60)*1000, number from numbers(5000); + +CREATE TABLE xMM(d DateTime,a Int64, f Int64) ENGINE = MergeTree PARTITION BY (toYYYYMM(d), a) ORDER BY tuple() SETTINGS index_granularity = 8192; +SYSTEM STOP MERGES xMM; +INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00') + number*60, 2, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00') + number*60, 3, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00') + number*60, 2, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00') + number*60, 1, number FROM numbers(5000); + + +SELECT '--------- tMM ----------------------------'; +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); +select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; +select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; +select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00'); +select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; + +SYSTEM START MERGES tMM; +OPTIMIZE TABLE tMM FINAL; + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; + + +SELECT '--------- tDD ----------------------------'; +SYSTEM START MERGES tDD; +OPTIMIZE TABLE tDD FINAL; + +select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; +select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; +select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; + + +SELECT '--------- sDD ----------------------------'; +select uniqExact(_part), count() from sDD; +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); +select uniqExact(_part), count() from sDD where d >= 1598918400000; +select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; + + +SELECT '--------- xMM ----------------------------'; +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; +select uniqExact(_part), count() from xMM where a = 1; +select uniqExact(_part), count() from xMM where a = 66; +select uniqExact(_part), count() from xMM where a <> 66; +select uniqExact(_part), count() from xMM where a = 2; + +SYSTEM START MERGES xMM; +optimize table xMM final; + +select uniqExact(_part), count() from xMM where a = 1; +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +select uniqExact(_part), count() from xMM where a <> 66; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; + +DROP TABLE tMM; +DROP TABLE tDD; +DROP TABLE sDD; +DROP TABLE xMM; + + diff --git a/tests/queries/0_stateless/01508_partition_pruning.reference b/tests/queries/0_stateless/01508_partition_pruning.reference new file mode 100644 index 00000000000..0cc40d23b41 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning.reference @@ -0,0 +1,244 @@ +--------- tMM ---------------------------- +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); +0 0 +Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); +2 2880 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; +0 0 +Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; +2 2880 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; +3 15000 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00'); +6 30000 +Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00'); +0 0 +Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; +2 6440 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; +2 2880 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; +2 6440 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +4 20000 +Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +3 11440 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +3 11440 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; +1 1440 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +3 9999 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; +4 20000 +Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +2 9999 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +1 10000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +2 20000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +--------- tDD ---------------------------- +select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); +1 10000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); +1 10000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; +1 10000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges + +select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges + +--------- sDD ---------------------------- +select uniqExact(_part), count() from sDD; +6 30000 +Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; +3 9999 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; +2 9999 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; +0 0 +Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); +3 11440 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000; +4 20000 +Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; +3 10001 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +--------- xMM ---------------------------- +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; +3 10001 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; +1 1 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +3 15000 +Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where a = 66; +0 0 +Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +6 30000 +Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from xMM where a = 2; +2 10000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +2 15000 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +1 10000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +5 30000 +Selected 5 parts by partition key, 5 parts by primary key, 5 marks by primary key, 5 marks to read from 5 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges + diff --git a/tests/queries/0_stateless/01508_partition_pruning.sh b/tests/queries/0_stateless/01508_partition_pruning.sh new file mode 100755 index 00000000000..c886946c7d9 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +#------------------------------------------------------------------------------------------- +# Description of test result: +# Test the correctness of the partition +# pruning +# +# Script executes queries from a file 01508_partition_pruning.queries (1 line = 1 query) +# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug +#------------------------------------------------------------------------------------------- + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +#export CLICKHOUSE_CLIENT="clickhouse-client --send_logs_level=none" +#export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none +#export CURDIR=. + + +queries="${CURDIR}/01508_partition_pruning.queries" +while IFS= read -r sql +do + [ -z "$sql" ] && continue + if [[ "$sql" == select* ]] ; + then + echo "$sql" + ${CLICKHOUSE_CLIENT} --query "$sql" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') + ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') + echo "" + else + ${CLICKHOUSE_CLIENT} --query "$sql" + fi +done < "$queries" + + diff --git a/tests/queries/0_stateless/01508_query_obfuscator.reference b/tests/queries/0_stateless/01508_query_obfuscator.reference new file mode 100644 index 00000000000..0064ac73a09 --- /dev/null +++ b/tests/queries/0_stateless/01508_query_obfuscator.reference @@ -0,0 +1,16 @@ +SELECT 116, 'Qqfu://2020-02-10isqkc1203 sp 2000-05-27T18:38:01', 13e100, Residue_id_breakfastDevice, park(Innervation), avgIf(remote('128.0.0.1')) +SELECT shell_dust_tintype between crumb and shoat, case when peach >= 116 then bombing else null end + +SELECT + ChimeID, + Testimonial.ID, Testimonial.SipCauseway, + TankfulTRUMPET, + HUMIDITY.TermiteName, HUMIDITY.TermiteSculptural, HUMIDITY.TermiteGuilt, HUMIDITY.TermiteIntensity, HUMIDITY.SipCauseway, HUMIDITY.Coat +FROM merge.tinkle_efficiency +WHERE + FaithSeller >= '2020-10-13' AND FaithSeller <= '2020-10-21' + AND MandolinID = 30750384 + AND intHash32(GafferID) = 448362928 AND intHash64(GafferID) = 12572659331310383983 + AND ChimeID IN (8195672321757027078, 7079643623150622129, 5057006826979676478, 7886875230160484653, 7494974311229040743) + AND Stot = 1 + diff --git a/tests/queries/0_stateless/01508_query_obfuscator.sh b/tests/queries/0_stateless/01508_query_obfuscator.sh new file mode 100755 index 00000000000..d60e42489fa --- /dev/null +++ b/tests/queries/0_stateless/01508_query_obfuscator.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_FORMAT --seed Hello --obfuscate <<< "SELECT 123, 'Test://2020-01-01hello1234 at 2000-01-01T01:02:03', 12e100, Gibberish_id_testCool, hello(World), avgIf(remote('127.0.0.1'))" +$CLICKHOUSE_FORMAT --seed Hello --obfuscate <<< "SELECT cost_first_screen between a and b, case when x >= 123 then y else null end" + +$CLICKHOUSE_FORMAT --seed Hello --obfuscate <<< " +SELECT + VisitID, + Goals.ID, Goals.EventTime, + WatchIDs, + EAction.ProductName, EAction.ProductPrice, EAction.ProductCurrency, EAction.ProductQuantity, EAction.EventTime, EAction.Type +FROM merge.visits_v2 +WHERE + StartDate >= '2020-09-17' AND StartDate <= '2020-09-25' + AND CounterID = 24226447 + AND intHash32(UserID) = 416638616 AND intHash64(UserID) = 13269091395366875299 + AND VisitID IN (5653048135597886819, 5556254872710352304, 5516214175671455313, 5476714937521999313, 5464051549483503043) + AND Sign = 1 +" diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper.reference b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper.reference new file mode 100644 index 00000000000..13de30f45d1 --- /dev/null +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper.reference @@ -0,0 +1 @@ +3000 diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper.sh new file mode 100755 index 00000000000..2af1cb214a4 --- /dev/null +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames0" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames50" + + +$CLICKHOUSE_CLIENT --query "CREATE TABLE table_for_renames0 (value UInt64, data String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test_01508/concurrent_rename', '1') ORDER BY tuple() SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, min_rows_for_compact_part = 100000, min_rows_for_compact_part = 10000000, write_ahead_log_max_bytes = 1" + + +$CLICKHOUSE_CLIENT --query "INSERT INTO table_for_renames0 SELECT number, toString(number) FROM numbers(1000)" + +$CLICKHOUSE_CLIENT --query "INSERT INTO table_for_renames0 SELECT number, toString(number) FROM numbers(1000, 1000)" + +$CLICKHOUSE_CLIENT --query "INSERT INTO table_for_renames0 SELECT number, toString(number) FROM numbers(2000, 1000)" + +for i in $(seq 1 50); do + prev_i=$((i - 1)) + $CLICKHOUSE_CLIENT --query "RENAME TABLE table_for_renames$prev_i TO table_for_renames$i" +done + +$CLICKHOUSE_CLIENT --query "SELECT COUNT() from table_for_renames50" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames0" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames50" diff --git a/tests/queries/0_stateless/01509_format_raw_blob.reference b/tests/queries/0_stateless/01509_format_raw_blob.reference new file mode 100644 index 00000000000..dfa8f538e67 --- /dev/null +++ b/tests/queries/0_stateless/01509_format_raw_blob.reference @@ -0,0 +1,2 @@ +96b229180107fd2d23fd0a2ef9326701 - +96b229180107fd2d23fd0a2ef9326701 - diff --git a/tests/queries/0_stateless/01509_format_raw_blob.sh b/tests/queries/0_stateless/01509_format_raw_blob.sh new file mode 100755 index 00000000000..68d3844d727 --- /dev/null +++ b/tests/queries/0_stateless/01509_format_raw_blob.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS t; +CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory; +" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT RawBLOB" < ${BASH_SOURCE[0]} + +cat ${BASH_SOURCE[0]} | md5sum +${CLICKHOUSE_CLIENT} -n --query "SELECT * FROM t FORMAT RawBLOB" | md5sum + +${CLICKHOUSE_CLIENT} --query " +DROP TABLE t; +" diff --git a/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.reference b/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.reference new file mode 100644 index 00000000000..f0e7d928857 --- /dev/null +++ b/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.reference @@ -0,0 +1 @@ +abc\\ Hello, world! diff --git a/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh b/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh new file mode 100755 index 00000000000..0f65280e1ce --- /dev/null +++ b/tests/queries/0_stateless/01510_format_regexp_raw_low_cardinality.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS t; +CREATE TABLE t (a String, b LowCardinality(Nullable(String))) ENGINE = Memory; +" + +${CLICKHOUSE_CLIENT} --format_regexp_escaping_rule 'Raw' --format_regexp '^(.+?) separator (.+?)$' --query ' +INSERT INTO t FORMAT Regexp abc\ separator Hello, world!' + +${CLICKHOUSE_CLIENT} -n --query " +SELECT * FROM t; +DROP TABLE t; +" diff --git a/tests/queries/0_stateless/01512_create_replicate_merge_tree_one_arg.reference b/tests/queries/0_stateless/01512_create_replicate_merge_tree_one_arg.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01512_create_replicate_merge_tree_one_arg.sql b/tests/queries/0_stateless/01512_create_replicate_merge_tree_one_arg.sql new file mode 100644 index 00000000000..f33c1534a44 --- /dev/null +++ b/tests/queries/0_stateless/01512_create_replicate_merge_tree_one_arg.sql @@ -0,0 +1,3 @@ +CREATE TABLE mt (v UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01497/mt') + ORDER BY tuple() -- { serverError 36 } + diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index e93e617be31..f7299d906e1 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -17,6 +17,7 @@ "00151_replace_partition_with_different_granularity", "00157_cache_dictionary", "01193_metadata_loading", + "01473_event_time_microseconds", "01474_executable_dictionary" /// informational stderr from sanitizer at start ], "address-sanitizer": [ @@ -25,6 +26,7 @@ "memory_profiler", "odbc_roundtrip", "01103_check_cpu_instructions_at_startup", + "01473_event_time_microseconds", "01193_metadata_loading" ], "ub-sanitizer": [ @@ -33,6 +35,7 @@ "memory_profiler", "01103_check_cpu_instructions_at_startup", "00900_orc_load", + "01473_event_time_microseconds", "01193_metadata_loading" ], "memory-sanitizer": [ @@ -43,6 +46,7 @@ "01086_odbc_roundtrip", /// can't pass because odbc libraries are not instrumented "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free "01114_mysql_database_engine_segfault", /// it fails in _nss_files_parse_servent while using NSS from GLibc to authenticate (need to get rid of it) + "01473_event_time_microseconds", "01193_metadata_loading" ], "debug-build": [ @@ -57,6 +61,7 @@ "01037_polygon_dicts_", "hyperscan", "01193_metadata_loading", + "01473_event_time_microseconds", "01396_inactive_replica_cleanup_nodes" ], "unbundled-build": [ diff --git a/tests/tsan_suppressions.txt b/tests/tsan_suppressions.txt index 912e0361bff..ccc36d876f7 100644 --- a/tests/tsan_suppressions.txt +++ b/tests/tsan_suppressions.txt @@ -1 +1,4 @@ -# Fortunately, we have no suppressions! +# looks like a bug in clang-11 thread sanitizer, detects normal data race with random FD in this method +race:DB::LazyPipeFDs::close +# races in openSSL https://github.com/openssl/openssl/issues/11974 +fun:evp_cipher_cache_constants diff --git a/utils/check-style/check-style b/utils/check-style/check-style index f745175a07f..3d2d99745e3 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -76,6 +76,9 @@ CURDIR=$(dirname "${BASH_SOURCE[0]}") # Check sh tests with Shellcheck (cd $ROOT_PATH/tests/queries/0_stateless/ && shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086 *.sh ../1_stateful/*.sh) +# Check docker scripts with shellcheck +find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | awk -F' ' '$2==" text/x-shellscript" {print $1}' | xargs shellcheck + # There shouldn't be any docker containers outside docker directory find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp index 16aa8aa7769..88418e9b6c3 100644 --- a/utils/db-generator/query_db_generator.cpp +++ b/utils/db-generator/query_db_generator.cpp @@ -1,3 +1,4 @@ +#include #include #include #include