diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4764e6d3c1a..259e6d41110 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,7 +172,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !cancelled() }} + if: ${{ !failure() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: diff --git a/README.md b/README.md index dc253d4db2d..98f9108f14c 100644 --- a/README.md +++ b/README.md @@ -34,14 +34,12 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 +* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 -* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 diff --git a/base/base/demangle.h b/base/base/demangle.h index ddca264ecab..af9ccad16c1 100644 --- a/base/base/demangle.h +++ b/base/base/demangle.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include diff --git a/base/base/extended_types.h b/base/base/extended_types.h index 796167ab45d..3bf3f4ed31d 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -108,6 +108,14 @@ struct make_unsigned // NOLINT(readability-identifier-naming) using type = std::make_unsigned_t; }; +template <> struct make_unsigned { using type = UInt8; }; +template <> struct make_unsigned { using type = UInt8; }; +template <> struct make_unsigned { using type = UInt16; }; +template <> struct make_unsigned { using type = UInt16; }; +template <> struct make_unsigned { using type = UInt32; }; +template <> struct make_unsigned { using type = UInt32; }; +template <> struct make_unsigned { using type = UInt64; }; +template <> struct make_unsigned { using type = UInt64; }; template <> struct make_unsigned { using type = UInt128; }; template <> struct make_unsigned { using type = UInt128; }; template <> struct make_unsigned { using type = UInt256; }; @@ -121,6 +129,14 @@ struct make_signed // NOLINT(readability-identifier-naming) using type = std::make_signed_t; }; +template <> struct make_signed { using type = Int8; }; +template <> struct make_signed { using type = Int8; }; +template <> struct make_signed { using type = Int16; }; +template <> struct make_signed { using type = Int16; }; +template <> struct make_signed { using type = Int32; }; +template <> struct make_signed { using type = Int32; }; +template <> struct make_signed { using type = Int64; }; +template <> struct make_signed { using type = Int64; }; template <> struct make_signed { using type = Int128; }; template <> struct make_signed { using type = Int128; }; template <> struct make_signed { using type = Int256; }; diff --git a/base/base/isSharedPtrUnique.h b/base/base/isSharedPtrUnique.h new file mode 100644 index 00000000000..c153605ecb1 --- /dev/null +++ b/base/base/isSharedPtrUnique.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +template +bool isSharedPtrUnique(const std::shared_ptr & ptr) +{ + return ptr.use_count() == 1; +} diff --git a/base/poco/Foundation/include/Poco/Format.h b/base/poco/Foundation/include/Poco/Format.h index f84be16d3ad..4f91dd44ca5 100644 --- a/base/poco/Foundation/include/Poco/Format.h +++ b/base/poco/Foundation/include/Poco/Format.h @@ -232,7 +232,7 @@ void Foundation_API format( const Any & value10); -void Foundation_API format(std::string & result, const std::string & fmt, const std::vector & values); +void Foundation_API formatVector(std::string & result, const std::string & fmt, const std::vector & values); /// Supports a variable number of arguments and is used by /// all other variants of format(). diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h index db966089e00..d0d964d8390 100644 --- a/base/poco/Foundation/include/Poco/RefCountedObject.h +++ b/base/poco/Foundation/include/Poco/RefCountedObject.h @@ -21,6 +21,8 @@ #include "Poco/AtomicCounter.h" #include "Poco/Foundation.h" +#include + namespace Poco { diff --git a/base/poco/Foundation/src/Format.cpp b/base/poco/Foundation/src/Format.cpp index 9872ddff042..94ab124510d 100644 --- a/base/poco/Foundation/src/Format.cpp +++ b/base/poco/Foundation/src/Format.cpp @@ -51,8 +51,8 @@ namespace } if (width != 0) str.width(width); } - - + + void parsePrec(std::ostream& str, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { if (itFmt != endFmt && *itFmt == '.') @@ -67,7 +67,7 @@ namespace if (prec >= 0) str.precision(prec); } } - + char parseMod(std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { char mod = 0; @@ -77,13 +77,13 @@ namespace { case 'l': case 'h': - case 'L': + case 'L': case '?': mod = *itFmt++; break; } } return mod; } - + std::size_t parseIndex(std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { int index = 0; @@ -110,8 +110,8 @@ namespace case 'f': str << std::fixed; break; } } - - + + void writeAnyInt(std::ostream& str, const Any& any) { if (any.type() == typeid(char)) @@ -201,7 +201,7 @@ namespace str << RefAnyCast(*itVal++); break; case 'z': - str << AnyCast(*itVal++); + str << AnyCast(*itVal++); break; case 'I': case 'D': @@ -303,7 +303,7 @@ void format(std::string& result, const std::string& fmt, const Any& value) { std::vector args; args.push_back(value); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -312,7 +312,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons std::vector args; args.push_back(value1); args.push_back(value2); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -322,7 +322,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value1); args.push_back(value2); args.push_back(value3); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -333,7 +333,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value2); args.push_back(value3); args.push_back(value4); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -345,7 +345,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value3); args.push_back(value4); args.push_back(value5); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -358,7 +358,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value4); args.push_back(value5); args.push_back(value6); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -372,7 +372,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value5); args.push_back(value6); args.push_back(value7); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -387,7 +387,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value6); args.push_back(value7); args.push_back(value8); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -403,7 +403,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value7); args.push_back(value8); args.push_back(value9); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -420,16 +420,16 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value8); args.push_back(value9); args.push_back(value10); - format(result, fmt, args); + formatVector(result, fmt, args); } -void format(std::string& result, const std::string& fmt, const std::vector& values) +void formatVector(std::string& result, const std::string& fmt, const std::vector& values) { std::string::const_iterator itFmt = fmt.begin(); std::string::const_iterator endFmt = fmt.end(); std::vector::const_iterator itVal = values.begin(); - std::vector::const_iterator endVal = values.end(); + std::vector::const_iterator endVal = values.end(); while (itFmt != endFmt) { switch (*itFmt) diff --git a/base/poco/MongoDB/src/ObjectId.cpp b/base/poco/MongoDB/src/ObjectId.cpp index 0125c246c2d..e360d129843 100644 --- a/base/poco/MongoDB/src/ObjectId.cpp +++ b/base/poco/MongoDB/src/ObjectId.cpp @@ -57,7 +57,7 @@ std::string ObjectId::toString(const std::string& fmt) const for (int i = 0; i < 12; ++i) { - s += format(fmt, (unsigned int) _id[i]); + s += Poco::format(fmt, (unsigned int) _id[i]); } return s; } diff --git a/base/poco/MongoDB/src/OpMsgCursor.cpp b/base/poco/MongoDB/src/OpMsgCursor.cpp index bc95851ae33..6abd45ecf76 100644 --- a/base/poco/MongoDB/src/OpMsgCursor.cpp +++ b/base/poco/MongoDB/src/OpMsgCursor.cpp @@ -43,9 +43,9 @@ namespace Poco { namespace MongoDB { -static const std::string keyCursor {"cursor"}; -static const std::string keyFirstBatch {"firstBatch"}; -static const std::string keyNextBatch {"nextBatch"}; +[[ maybe_unused ]] static const std::string keyCursor {"cursor"}; +[[ maybe_unused ]] static const std::string keyFirstBatch {"firstBatch"}; +[[ maybe_unused ]] static const std::string keyNextBatch {"nextBatch"}; static Poco::Int64 cursorIdFromResponse(const MongoDB::Document& doc); @@ -131,7 +131,7 @@ OpMsgMessage& OpMsgCursor::next(Connection& connection) connection.readResponse(_response); } else -#endif +#endif { _response.clear(); _query.setCursor(_cursorID, _batchSize); diff --git a/base/poco/Net/include/Poco/Net/NameValueCollection.h b/base/poco/Net/include/Poco/Net/NameValueCollection.h index be499838d0e..2337535bd11 100644 --- a/base/poco/Net/include/Poco/Net/NameValueCollection.h +++ b/base/poco/Net/include/Poco/Net/NameValueCollection.h @@ -79,7 +79,7 @@ namespace Net /// Returns the value of the first name-value pair with the given name. /// If no value with the given name has been found, the defaultValue is returned. - const std::vector> getAll(const std::string & name) const; + std::vector getAll(const std::string & name) const; /// Returns all values of all name-value pairs with the given name. /// /// Returns an empty vector if there are no name-value pairs with the given name. diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index c0083ec410c..b7ab5543a85 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -17,9 +17,9 @@ #include "Poco/NumberFormatter.h" #include "Poco/NumberParser.h" #include "Poco/String.h" +#include #include - using Poco::NumberFormatter; using Poco::NumberParser; using Poco::icompare; @@ -75,7 +75,7 @@ void HTTPMessage::setContentLength(std::streamsize length) erase(CONTENT_LENGTH); } - + std::streamsize HTTPMessage::getContentLength() const { const std::string& contentLength = get(CONTENT_LENGTH, EMPTY); @@ -98,7 +98,7 @@ void HTTPMessage::setContentLength64(Poco::Int64 length) erase(CONTENT_LENGTH); } - + Poco::Int64 HTTPMessage::getContentLength64() const { const std::string& contentLength = get(CONTENT_LENGTH, EMPTY); @@ -133,13 +133,13 @@ void HTTPMessage::setChunkedTransferEncoding(bool flag) setTransferEncoding(IDENTITY_TRANSFER_ENCODING); } - + bool HTTPMessage::getChunkedTransferEncoding() const { return icompare(getTransferEncoding(), CHUNKED_TRANSFER_ENCODING) == 0; } - + void HTTPMessage::setContentType(const std::string& mediaType) { if (mediaType.empty()) @@ -154,7 +154,7 @@ void HTTPMessage::setContentType(const MediaType& mediaType) setContentType(mediaType.toString()); } - + const std::string& HTTPMessage::getContentType() const { return get(CONTENT_TYPE, UNKNOWN_CONTENT_TYPE); diff --git a/base/poco/Net/src/NameValueCollection.cpp b/base/poco/Net/src/NameValueCollection.cpp index 783ed48cc30..e35d66d3bde 100644 --- a/base/poco/Net/src/NameValueCollection.cpp +++ b/base/poco/Net/src/NameValueCollection.cpp @@ -102,9 +102,9 @@ const std::string& NameValueCollection::get(const std::string& name, const std:: return defaultValue; } -const std::vector> NameValueCollection::getAll(const std::string& name) const +std::vector NameValueCollection::getAll(const std::string& name) const { - std::vector> values; + std::vector values; for (ConstIterator it = _map.find(name); it != _map.end(); it++) if (it->first == name) values.push_back(it->second); diff --git a/cmake/target.cmake b/cmake/target.cmake index d6c497955f6..3d0ecd032f9 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") + message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}") endif () diff --git a/contrib/avro b/contrib/avro index d43acc84d3d..545e7002683 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9 +Subproject commit 545e7002683cbc2198164d93088ac8e4955b4628 diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index abde20addaf..250b47b7c2c 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -125,7 +125,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH) -configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY) +configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/crt/Config.h" @ONLY) list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) diff --git a/contrib/azure b/contrib/azure index 92c94d7f37a..ea3e19a7be0 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf +Subproject commit ea3e19a7be08519134c643177d56c7484dfec884 diff --git a/contrib/grpc b/contrib/grpc index 77b2737a709..f5b7fdc2dff 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit 77b2737a709d43d8c6895e3f03ca62b00bd9201c +Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 38ebcc8f680..1fbfd29a3bd 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -179,12 +179,19 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. -# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. -# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking. +# jemalloc provides support two unwind flavors: +# - JEMALLOC_PROF_LIBUNWIND - unw_backtrace() - gnu libunwind (compatible with llvm libunwind) +# - JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() - the original HP libunwind and the one coming with gcc / g++ / libstdc++. # -# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). -target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +# But for JEMALLOC_PROF_LIBGCC it also calls _Unwind_Backtrace() during +# bootstraping of jemalloc, which may lead to deadlock, if the dlsym will do +# allocations somewhere (like glibc does prio 2.34, see [1]). +# +# [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9 +# +# And since ClickHouse unwind already supports unw_backtrace() we can safely +# switch to it to avoid this deadlock. +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 6a0012c01bf..246e19593f6 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -54,7 +54,6 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c" "${LIBPQ_SOURCE_DIR}/port/thread.c" "${LIBPQ_SOURCE_DIR}/port/path.c" - "${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c" ) add_library(_libpq ${SRCS}) diff --git a/contrib/orc b/contrib/orc index 947cebaf943..bcc025c0982 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f +Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f diff --git a/contrib/pocketfft b/contrib/pocketfft index 9efd4da52cf..f4c1aa8aa9c 160000 --- a/contrib/pocketfft +++ b/contrib/pocketfft @@ -1 +1 @@ -Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546 +Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3 diff --git a/contrib/rocksdb b/contrib/rocksdb index 3a0b80ca9d6..be366233921 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984 +Subproject commit be366233921293bd07a84dc4ea6991858665f202 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index c4220ba90ac..3a14407166c 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -1,24 +1,17 @@ -option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES}) +option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES}) if (NOT ENABLE_ROCKSDB) - message (STATUS "Not using rocksdb") + message (STATUS "Not using RocksDB") return() endif() -## this file is extracted from `contrib/rocksdb/CMakeLists.txt` -set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") -list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") - -set(PORTABLE ON) -## always disable jemalloc for rocksdb by default -## because it introduces non-standard jemalloc APIs +# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs option(WITH_JEMALLOC "build with JeMalloc" OFF) -set(USE_SNAPPY OFF) -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY ON) -endif() -option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY}) -## lz4, zlib, zstd is enabled in ClickHouse by default + +option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING + +# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd +option(WITH_SNAPPY "build with SNAPPY" ON) option(WITH_LZ4 "build with lz4" ON) option(WITH_ZLIB "build with zlib" ON) option(WITH_ZSTD "build with zstd" ON) @@ -26,78 +19,46 @@ option(WITH_ZSTD "build with zstd" ON) # third-party/folly is only validated to work on Linux and Windows for now. # So only turn it on there by default. if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") - if(MSVC AND MSVC_VERSION LESS 1910) - # Folly does not compile with MSVC older than VS2017 - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) - else() - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) - endif() + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) else() option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) endif() -if( NOT DEFINED CMAKE_CXX_STANDARD ) - set(CMAKE_CXX_STANDARD 11) +if(WITH_SNAPPY) + add_definitions(-DSNAPPY) + list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) endif() -if(MSVC) - option(WITH_XPRESS "build with windows built in compression" OFF) - include("${ROCKSDB_SOURCE_DIR}/thirdparty.inc") -else() - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") - # FreeBSD has jemalloc as default malloc - # but it does not have all the jemalloc files in include/... - set(WITH_JEMALLOC ON) - else() - if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc) - add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) - list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc) - endif() - endif() - - if(WITH_SNAPPY) - add_definitions(-DSNAPPY) - list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) - endif() - - if(WITH_ZLIB) - add_definitions(-DZLIB) - list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) - endif() - - if(WITH_LZ4) - add_definitions(-DLZ4) - list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) - endif() - - if(WITH_ZSTD) - add_definitions(-DZSTD) - list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) - endif() +if(WITH_ZLIB) + add_definitions(-DZLIB) + list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") - if(POWER9) - set(HAS_POWER9 1) - set(HAS_ALTIVEC 1) - else() - set(HAS_POWER8 1) - set(HAS_ALTIVEC 1) - endif(POWER9) -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") +if(WITH_LZ4) + add_definitions(-DLZ4) + list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) +endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") - set(HAS_ARMV8_CRC 1) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") +if(WITH_ZSTD) + add_definitions(-DZSTD) + list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) +endif() +option(PORTABLE "build a portable binary" ON) -if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ) +if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) add_definitions(-DHAVE_SSE42) add_definitions(-DHAVE_PCLMUL) endif() +if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") + set (HAS_ARMV8_CRC 1) + # the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general + # ARM flags, see cmake/cpu_features.cmake + # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") +endif() + set (HAVE_THREAD_LOCAL 1) if(HAVE_THREAD_LOCAL) add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) @@ -107,8 +68,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) -elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") - add_definitions(-DOS_SOLARIS) elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_definitions(-DOS_FREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "Android") @@ -123,12 +82,10 @@ endif() if (OS_LINUX) add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) -elseif (OS_FREEBSD) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) endif() +set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") include_directories(${ROCKSDB_SOURCE_DIR}) include_directories("${ROCKSDB_SOURCE_DIR}/include") @@ -136,11 +93,11 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX) include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly") endif() -# Main library source code - set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc @@ -156,6 +113,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc ${ROCKSDB_SOURCE_DIR}/db/column_family.cc @@ -229,6 +187,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc + ${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc ${ROCKSDB_SOURCE_DIR}/file/file_util.cc @@ -247,6 +206,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc ${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc ${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memory/memory_allocator.cc ${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc @@ -322,6 +282,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/unique_id.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc ${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc @@ -333,9 +294,12 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc - ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/util/coding.cc ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc ${ROCKSDB_SOURCE_DIR}/util/comparator.cc @@ -347,6 +311,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc ${ROCKSDB_SOURCE_DIR}/util/random.cc ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc + ${ROCKSDB_SOURCE_DIR}/util/regex.cc ${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc ${ROCKSDB_SOURCE_DIR}/util/slice.cc ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc @@ -362,18 +327,23 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc @@ -393,6 +363,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc @@ -411,6 +382,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc @@ -425,7 +397,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc - rocksdb_build_version.cc) + build_version.cc) # generated by hand if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) set_source_files_properties( @@ -462,5 +434,6 @@ endif() add_library(_rocksdb ${SOURCES}) add_library(ch_contrib::rocksdb ALIAS _rocksdb) target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) + # SYSTEM is required to overcome some issues target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include") diff --git a/contrib/rocksdb-cmake/rocksdb_build_version.cc b/contrib/rocksdb-cmake/build_version.cc similarity index 100% rename from contrib/rocksdb-cmake/rocksdb_build_version.cc rename to contrib/rocksdb-cmake/build_version.cc diff --git a/contrib/s2geometry b/contrib/s2geometry index 0146e2d1355..6522a40338d 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57 +Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 5eabe71b538..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,6 @@ option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) -# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged -if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X) +if (NOT ENABLE_S2_GEOMETRY) message(STATUS "Not using S2 Geometry") return() endif() diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 018fe57bf56..c59ef1b919a 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/reqgenerator.py b/docker/reqgenerator.py new file mode 100644 index 00000000000..6c1d89ac0ac --- /dev/null +++ b/docker/reqgenerator.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# To run this script you must install docker and piddeptree python package +# + +import subprocess +import os +import sys + + +def build_docker_deps(image_name, imagedir): + cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt""" + subprocess.check_call(cmd, shell=True) + + +def check_docker_file_install_with_pip(filepath): + image_name = None + with open(filepath, "r") as f: + for line in f: + if "docker build" in line: + arr = line.split(" ") + if len(arr) > 4: + image_name = arr[4] + if "pip3 install" in line or "pip install" in line: + return image_name, True + return image_name, False + + +def process_affected_images(images_dir): + for root, _dirs, files in os.walk(images_dir): + for f in files: + if f == "Dockerfile": + docker_file_path = os.path.join(root, f) + print("Checking image on path", docker_file_path) + image_name, has_pip = check_docker_file_install_with_pip( + docker_file_path + ) + if has_pip: + print("Find pip in", image_name) + try: + build_docker_deps(image_name, root) + except Exception as ex: + print(ex) + else: + print("Pip not found in", docker_file_path) + + +process_affected_images(sys.argv[1]) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index a86406e5129..240df79aeb1 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 25f3273a648..ac64655991a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e0be261d5e8..2512268be0f 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -19,10 +19,7 @@ RUN apt-get update \ odbcinst \ psmisc \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ unixodbc \ pv \ jq \ @@ -31,7 +28,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld @@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu + ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ && cd /tmp/ccache \ diff --git a/docker/test/fasttest/requirements.txt b/docker/test/fasttest/requirements.txt new file mode 100644 index 00000000000..993ea22e5ae --- /dev/null +++ b/docker/test/fasttest/requirements.txt @@ -0,0 +1,41 @@ +Jinja2==3.1.3 +MarkupSafe==2.1.5 +PyJWT==2.3.0 +PyYAML==6.0.1 +Pygments==2.11.2 +SecretStorage==3.3.1 +blinker==1.4 +certifi==2020.6.20 +chardet==4.0.0 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +pytz==2024.1 +requests==2.32.3 +scipy==1.12.0 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 4d5159cfa9e..0d975d64010 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -84,6 +84,8 @@ function start_server echo "ClickHouse server pid '$server_pid' started and responded" } +export -f start_server + function clone_root { [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" @@ -254,6 +256,19 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } +function timeout_with_logging() { + local exit_code=0 + + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + function run_tests { clickhouse-server --version @@ -269,6 +284,11 @@ function run_tests NPROC=1 fi + export CLICKHOUSE_CONFIG_DIR=$FASTTEST_DATA + export CLICKHOUSE_CONFIG="$FASTTEST_DATA/config.xml" + export CLICKHOUSE_USER_FILES="$FASTTEST_DATA/user_files" + export CLICKHOUSE_SCHEMA_FILES="$FASTTEST_DATA/format_schemas" + local test_opts=( --hung-check --fast-tests-only @@ -292,6 +312,8 @@ function run_tests clickhouse stop --pid-path "$FASTTEST_DATA" } +export -f run_tests + case "$stage" in "") ls -la @@ -315,7 +337,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - run_tests + timeout_with_logging 35m bash -c run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index d3f78ac1d95..e1fb09b8ed5 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -31,7 +31,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/fuzzer/requirements.txt b/docker/test/fuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/fuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 270b40e23a6..469251f648c 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -33,7 +33,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install pycurl +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/integration/base/requirements.txt b/docker/test/integration/base/requirements.txt new file mode 100644 index 00000000000..d195d8deaf6 --- /dev/null +++ b/docker/test/integration/base/requirements.txt @@ -0,0 +1,26 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +pycurl==7.45.3 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile index 01b9b777614..b35a7262651 100644 --- a/docker/test/integration/resolver/Dockerfile +++ b/docker/test/integration/resolver/Dockerfile @@ -2,4 +2,5 @@ # Helper docker container to run python bottle apps FROM python:3 -RUN python -m pip install bottle +COPY requirements.txt / +RUN python -m pip install --no-cache-dir -r requirements.txt diff --git a/docker/test/integration/resolver/requirements.txt b/docker/test/integration/resolver/requirements.txt new file mode 100644 index 00000000000..fbf85295329 --- /dev/null +++ b/docker/test/integration/resolver/requirements.txt @@ -0,0 +1,6 @@ +bottle==0.12.25 +packaging==24.1 +pip==23.2.1 +pipdeptree==2.23.0 +setuptools==69.0.3 +wheel==0.42.0 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 23d8a37d822..d250b746e7d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,7 +26,6 @@ RUN apt-get update \ libicu-dev \ bsdutils \ curl \ - python3-pika \ liblua5.1-dev \ luajit \ libssl-dev \ @@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ # kazoo 2.10.0 is broken # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html -RUN python3 -m pip install --no-cache-dir \ - PyMySQL==1.1.0 \ - asyncio==3.4.3 \ - avro==1.10.2 \ - azure-storage-blob==12.19.0 \ - boto3==1.34.24 \ - cassandra-driver==3.29.0 \ - confluent-kafka==2.3.0 \ - delta-spark==2.3.0 \ - dict2xml==1.7.4 \ - dicttoxml==1.7.16 \ - docker==6.1.3 \ - docker-compose==1.29.2 \ - grpcio==1.60.0 \ - grpcio-tools==1.60.0 \ - kafka-python==2.0.2 \ - lz4==4.3.3 \ - minio==7.2.3 \ - nats-py==2.6.0 \ - protobuf==4.25.2 \ - kazoo==2.9.0 \ - psycopg2-binary==2.9.6 \ - pyhdfs==0.3.1 \ - pymongo==3.11.0 \ - pyspark==3.3.2 \ - pytest==7.4.4 \ - pytest-order==1.0.0 \ - pytest-random==0.2 \ - pytest-repeat==0.9.3 \ - pytest-timeout==2.2.0 \ - pytest-xdist==3.5.0 \ - pytest-reportlog==0.4.0 \ - pytz==2023.3.post1 \ - pyyaml==5.3.1 \ - redis==5.0.1 \ - requests-kerberos==0.14.0 \ - tzlocal==2.1 \ - retry==0.9.2 \ - bs4==0.0.2 \ - lxml==5.1.0 \ - urllib3==2.0.7 \ - jwcrypto==1.5.6 -# bs4, lxml are for cloud tests, do not delete +COPY requirements.txt / +RUN python3 -m pip install --no-cache-dir -r requirements.txt # Hudi supports only spark 3.3.*, not 3.4 RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt new file mode 100644 index 00000000000..8a77d8abf77 --- /dev/null +++ b/docker/test/integration/runner/requirements.txt @@ -0,0 +1,113 @@ +PyHDFS==0.3.1 +PyJWT==2.3.0 +PyMySQL==1.1.0 +PyNaCl==1.5.0 +PyYAML==5.3.1 +SecretStorage==3.3.1 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +async-timeout==4.0.3 +asyncio==3.4.3 +attrs==23.2.0 +avro==1.10.2 +azure-core==1.30.1 +azure-storage-blob==12.19.0 +bcrypt==4.1.3 +beautifulsoup4==4.12.3 +blinker==1.4 +boto3==1.34.24 +botocore==1.34.101 +bs4==0.0.2 +cassandra-driver==3.29.0 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +confluent-kafka==2.3.0 +cryptography==3.4.8 +dbus-python==1.2.18 +decorator==5.1.1 +delta-spark==2.3.0 +dict2xml==1.7.4 +dicttoxml==1.7.16 +distro-info==1.1+ubuntu0.2 +distro==1.7.0 +docker-compose==1.29.2 +docker==6.1.3 +dockerpty==0.4.1 +docopt==0.6.2 +exceptiongroup==1.2.1 +execnet==2.1.1 +geomet==0.2.1.post1 +grpcio-tools==1.60.0 +grpcio==1.60.0 +gssapi==1.8.3 +httplib2==0.20.2 +idna==3.7 +importlib-metadata==4.6.4 +iniconfig==2.0.0 +isodate==0.6.1 +jeepney==0.7.1 +jmespath==1.0.1 +jsonschema==3.2.0 +jwcrypto==1.5.6 +kafka-python==2.0.2 +kazoo==2.9.0 +keyring==23.5.0 +krb5==0.5.1 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==5.1.0 +lz4==4.3.3 +minio==7.2.3 +more-itertools==8.10.0 +nats-py==2.6.0 +oauthlib==3.2.0 +packaging==24.0 +paramiko==3.4.0 +pika==1.2.0 +pip==24.1.1 +pipdeptree==2.23.0 +pluggy==1.5.0 +protobuf==4.25.2 +psycopg2-binary==2.9.6 +py4j==0.10.9.5 +py==1.11.0 +pycparser==2.22 +pycryptodome==3.20.0 +pymongo==3.11.0 +pyparsing==2.4.7 +pyrsistent==0.20.0 +pyspark==3.3.2 +pyspnego==0.10.2 +pytest-order==1.0.0 +pytest-random==0.2 +pytest-repeat==0.9.3 +pytest-reportlog==0.4.0 +pytest-timeout==2.2.0 +pytest-xdist==3.5.0 +pytest==7.4.4 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +python-dotenv==0.21.1 +pytz==2023.3.post1 +redis==5.0.1 +requests-kerberos==0.14.0 +requests==2.31.0 +retry==0.9.2 +s3transfer==0.10.1 +setuptools==59.6.0 +simplejson==3.19.2 +six==1.16.0 +soupsieve==2.5 +texttable==1.7.0 +tomli==2.0.1 +typing_extensions==4.11.0 +tzlocal==2.1 +unattended-upgrades==0.1 +urllib3==2.0.7 +wadllib==1.3.6 +websocket-client==0.59.0 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index c9802a0e44e..e6eb2ae336e 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -1,3 +1,4 @@ +# docker build -t clickhouse/libfuzzer . ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG @@ -29,7 +30,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/libfuzzer/requirements.txt b/docker/test/libfuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/libfuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1835900b316..c68a39f6f70 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -23,7 +23,6 @@ RUN apt-get update \ python3 \ python3-dev \ python3-pip \ - python3-setuptools \ rsync \ tree \ tzdata \ @@ -33,12 +32,14 @@ RUN apt-get update \ cargo \ ripgrep \ zstd \ - && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +COPY requirements.txt / +RUN pip3 --no-cache-dir install -r requirements.txt + COPY run.sh / CMD ["bash", "/run.sh"] diff --git a/docker/test/performance-comparison/requirements.txt b/docker/test/performance-comparison/requirements.txt new file mode 100644 index 00000000000..932527cc022 --- /dev/null +++ b/docker/test/performance-comparison/requirements.txt @@ -0,0 +1,32 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +Pygments==2.11.2 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2023.4 +PyYAML==6.0.1 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==2.1 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1ea1e52e6fa..1425e12cd84 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -18,11 +18,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - numpy \ - pyodbc \ - deepdiff \ - sqlglot +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" diff --git a/docker/test/sqllogic/requirements.txt b/docker/test/sqllogic/requirements.txt new file mode 100644 index 00000000000..abc0a368659 --- /dev/null +++ b/docker/test/sqllogic/requirements.txt @@ -0,0 +1,30 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +deepdiff==7.0.1 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.4 +oauthlib==3.2.0 +ordered-set==4.1.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyodbc==5.1.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +sqlglot==23.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 7f59f65761f..71d915b0c7a 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -14,9 +14,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - pyyaml \ - clickhouse-driver +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG sqltest_repo="https://github.com/elliotchance/sqltest/" diff --git a/docker/test/sqltest/requirements.txt b/docker/test/sqltest/requirements.txt new file mode 100644 index 00000000000..4a0ae3edbac --- /dev/null +++ b/docker/test/sqltest/requirements.txt @@ -0,0 +1,29 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2024.1 +PyYAML==6.0.1 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==5.2 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 355e70f180e..0daf88cad7e 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests \ nodejs \ npm \ && apt-get clean \ diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 2215ac2b37c..80e5e81a4b1 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -16,6 +16,9 @@ dpkg -i package_folder/clickhouse-client_*.deb ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +# shellcheck disable=SC1091 +source /utils.lib + # install test configs /usr/share/clickhouse-test/config/install.sh @@ -272,3 +275,5 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index c3d80a7334b..5a655a3fd2b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,10 +25,7 @@ RUN apt-get update -y \ openssl \ postgresql-client \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ qemu-user-static \ sqlite3 \ sudo \ @@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR && unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \ && rm protoc-${PROTOC_VERSION}-linux-x86_64.zip -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && cd /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt new file mode 100644 index 00000000000..3284107e24e --- /dev/null +++ b/docker/test/stateless/requirements.txt @@ -0,0 +1,51 @@ +awscli==1.22.34 +blinker==1.4 +botocore==1.23.34 +certifi==2020.6.20 +chardet==4.0.0 +colorama==0.4.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +docutils==0.17.1 +gyp==0.1 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.3 +jmespath==0.10.0 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyarrow==15.0.0 +pyasn1==0.4.8 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.8.1 +pytz==2024.1 +PyYAML==6.0.1 +requests==2.32.3 +roman==3.3 +rsa==4.8 +s3transfer==0.5.0 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5ece9743498..5f2cb95de75 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,19 +6,30 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a +MAX_RUN_TIME=${MAX_RUN_TIME:-10800} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) + +USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} +USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} + +RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1 + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 +fi + # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well # (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" -echo "Choosen random timezone $TZ" +echo "Chosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run) -dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true -dpkg -i package_folder/clickhouse-library-bridge_*.deb || true +dpkg -i package_folder/clickhouse-odbc-bridge_*.deb +dpkg -i package_folder/clickhouse-library-bridge_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb @@ -55,12 +66,6 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml - #todo: remove these after 24.3 released. - sudo sed -i "s|azure<|azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml - - #todo: remove these after 24.3 released. - sudo sed -i "s|local<|local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml - function remove_keeper_config() { sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml @@ -93,10 +98,57 @@ if [ "$NUM_TRIES" -gt "1" ]; then mkdir -p /var/run/clickhouse-server fi +# Run a CH instance to execute sequential tests on it in parallel with all other tests. +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + mkdir -p /var/run/clickhouse-server3 /etc/clickhouse-server3 /var/lib/clickhouse3 + cp -r -L /etc/clickhouse-server/* /etc/clickhouse-server3/ + + sudo chown clickhouse:clickhouse /var/run/clickhouse-server3 /var/lib/clickhouse3 /etc/clickhouse-server3/ + sudo chown -R clickhouse:clickhouse /etc/clickhouse-server3/* + + function replace(){ + sudo find /etc/clickhouse-server3/ -type f -name '*.xml' -exec sed -i "$1" {} \; + } + + replace "s|9000|19000|g" + replace "s|9440|19440|g" + replace "s|9988|19988|g" + replace "s|9234|19234|g" + replace "s|9181|19181|g" + replace "s|8443|18443|g" + replace "s|9000|19000|g" + replace "s|9181|19181|g" + replace "s|9440|19440|g" + replace "s|9010|19010|g" + replace "s|9004|19004|g" + replace "s|9005|19005|g" + replace "s|9009|19009|g" + replace "s|8123|18123|g" + replace "s|/var/lib/clickhouse/|/var/lib/clickhouse3/|g" + replace "s|/etc/clickhouse-server/|/etc/clickhouse-server3/|g" + # distributed cache + replace "s|10001|10003|g" + replace "s|10002|10004|g" + + sudo -E -u clickhouse /usr/bin/clickhouse server --daemon --config /etc/clickhouse-server3/config.xml \ + --pid-file /var/run/clickhouse-server3/clickhouse-server.pid \ + -- --path /var/lib/clickhouse3/ --logger.stderr /var/log/clickhouse-server/stderr3.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server3.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server3.err.log \ + --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ + --prometheus.port 19988 --keeper_server.raft_configuration.server.port 19234 --keeper_server.tcp_port 19181 \ + --mysql_port 19004 --postgresql_port 19005 + + for _ in {1..100} + do + clickhouse-client --port 19000 --query "SELECT 1" && break + sleep 1 + done +fi + # simplest way to forward env variables to server sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_2/|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml @@ -133,7 +185,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp @@ -213,15 +265,15 @@ function run_tests() ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--shared-catalog') fi - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--replicated-database') # Too many tests fail for DatabaseReplicated in parallel. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('2') + ADDITIONAL_OPTIONS+=('3') elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%SANITIZE_COVERAGE%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then # Coverage on a per-test basis could only be collected sequentially. # Do not set the --jobs parameter. @@ -229,7 +281,11 @@ function run_tests() else # All other configurations are OK. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('8') + ADDITIONAL_OPTIONS+=('5') + fi + + if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--run-sequential-tests-in-parallel') fi if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then @@ -253,7 +309,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt @@ -262,14 +318,17 @@ function run_tests() export -f run_tests + +# This should be enough to setup job and collect artifacts +TIMEOUT=$((MAX_RUN_TIME - 600)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$TIMEOUT" bash -c run_tests ||: echo "Files in current directory" ls -la ./ @@ -290,7 +349,7 @@ do err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 @@ -299,7 +358,7 @@ do [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 @@ -310,12 +369,17 @@ done # Why do we read data with clickhouse-local? # Because it's the simplest way to read it when server has crashed. sudo clickhouse stop ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + sudo clickhouse stop --pid-path /var/run/clickhouse-server3 ||: +fi + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: fi @@ -323,6 +387,12 @@ rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + rg -Fa "" /var/log/clickhouse-server3/clickhouse-server.log ||: + rg -A50 -Fa "============" /var/log/clickhouse-server3/stderr.log ||: + zstd --threads=0 < /var/log/clickhouse-server3/clickhouse-server.log > /test_output/clickhouse-server3.log.zst & +fi + data_path_config="--path=/var/lib/clickhouse/" if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason) @@ -342,12 +412,17 @@ if [ $failed_to_save_logs -ne 0 ]; then for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + + if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + clickhouse-local --path /var/lib/clickhouse3/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.3.tsv.zst ||: + fi + + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: fi done @@ -383,7 +458,14 @@ rm -rf /var/lib/clickhouse/data/system/*/ tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + rm -rf /var/lib/clickhouse3/data/system/*/ + tar -chf /test_output/store.tar /var/lib/clickhouse3/store ||: + tar -chf /test_output/metadata.tar /var/lib/clickhouse3/metadata/*.sql ||: +fi + + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: @@ -394,9 +476,11 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index c069ccbdd8d..682da1df837 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -1,8 +1,5 @@ #!/bin/bash -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' - OK="\tOK\t\\N\t" FAIL="\tFAIL\t\\N\t" @@ -315,12 +312,4 @@ function collect_query_and_trace_logs() done } -function collect_core_dumps() -{ - find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do - zstd --threads=0 "$core" - mv "$core.zst" /test_output/ - done -} - # vi: ft=bash diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 9b6ab535a90..c3bb8ae9ea4 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -1,5 +1,10 @@ #!/bin/bash +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' +# ASAN doesn't work with suid_dumpable=2 +sysctl fs.suid_dumpable=1 + function run_with_retry() { if [[ $- =~ e ]]; then @@ -38,7 +43,7 @@ function fn_exists() { function timeout_with_logging() { local exit_code=0 - timeout "${@}" || exit_code="${?}" + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" if [[ "${exit_code}" -eq "124" ]] then @@ -48,4 +53,12 @@ function timeout_with_logging() { return $exit_code } +function collect_core_dumps() +{ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ + done +} + # vi: ft=bash diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 323944591b1..86467394513 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -21,6 +21,9 @@ source /attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib +# shellcheck disable=SC1091 +source /utils.lib + install_packages package_folder # Thread Fuzzer allows to check more permutations of possible thread scheduling diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7cd712b73f6..cdc1d1fa095 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* # python-magic is the same version as in Ubuntu 22.04 -RUN pip3 install \ - PyGithub \ - black==23.12.0 \ - boto3 \ - codespell==2.2.1 \ - mypy==1.8.0 \ - pylint==3.1.0 \ - python-magic==0.4.24 \ - flake8==4.0.1 \ - requests \ - thefuzz \ - tqdm==4.66.4 \ - types-requests \ - unidiff \ - jwt \ - && rm -rf /root/.cache/pip +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt new file mode 100644 index 00000000000..bb0cd55dd1a --- /dev/null +++ b/docker/test/style/requirements.txt @@ -0,0 +1,58 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +astroid==3.1.0 +async-timeout==4.0.3 +attrs==23.2.0 +black==23.12.0 +boto3==1.34.131 +botocore==1.34.131 +certifi==2024.6.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +codespell==2.2.1 +cryptography==42.0.8 +Deprecated==1.2.14 +dill==0.3.8 +flake8==4.0.1 +frozenlist==1.4.1 +idna==3.7 +isort==5.13.2 +jmespath==1.0.1 +jwt==1.3.1 +mccabe==0.6.1 +multidict==6.0.5 +mypy==1.8.0 +mypy-extensions==1.0.0 +packaging==24.1 +pathspec==0.9.0 +pip==24.1.1 +pipdeptree==2.23.0 +platformdirs==4.2.2 +pycodestyle==2.8.0 +pycparser==2.22 +pyflakes==2.4.0 +PyGithub==2.3.0 +PyJWT==2.8.0 +pylint==3.1.0 +PyNaCl==1.5.0 +python-dateutil==2.9.0.post0 +python-magic==0.4.24 +PyYAML==6.0.1 +rapidfuzz==3.9.3 +requests==2.32.3 +s3transfer==0.10.1 +setuptools==59.6.0 +six==1.16.0 +thefuzz==0.22.1 +tomli==2.0.1 +tomlkit==0.12.5 +tqdm==4.66.4 +types-requests==2.32.0.20240622 +typing_extensions==4.12.2 +unidiff==0.7.5 +urllib3==2.2.2 +wheel==0.37.1 +wrapt==1.16.0 +yamllint==1.26.3 +yarl==1.9.4 diff --git a/docs/changelogs/v24.6.2.17-stable.md b/docs/changelogs/v24.6.2.17-stable.md new file mode 100644 index 00000000000..820937f6291 --- /dev/null +++ b/docs/changelogs/v24.6.2.17-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478) + +#### New Feature +* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 11181703645..06325fa15fb 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -75,7 +75,7 @@ SETTINGS Possible values: - unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. -- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. +- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed at some point and later on a file named 'AA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a81a17e65d6..b91b794d2d6 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1535,6 +1535,10 @@ the columns from input data will be mapped to the columns from the table by thei Otherwise, the first row will be skipped. If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. +If setting [output_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#output_format_binary_encode_types_in_binary_format) is set to 1, +the types in header will be written using [binary encoding](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes output format. +If setting [input_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#input_format_binary_encode_types_in_binary_format) is set to 1, +the types in header will be read using [binary encoding](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes input format. ::: ## RowBinaryWithDefaults {#rowbinarywithdefaults} diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 46c24ad8491..fc861e25e9f 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`. - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 70f64d08ba3..fe60ceedc0b 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -2,15 +2,11 @@ slug: /en/operations/opentelemetry sidebar_position: 62 sidebar_label: Tracing ClickHouse with OpenTelemetry -title: "[experimental] Tracing ClickHouse with OpenTelemetry" +title: "Tracing ClickHouse with OpenTelemetry" --- [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry. -:::note -This is an experimental feature that will change in backwards-incompatible ways in future releases. -::: - ## Supplying Trace Context to ClickHouse ClickHouse accepts trace context HTTP headers, as described by the [W3C recommendation](https://www.w3.org/TR/trace-context/). It also accepts trace context over a native protocol that is used for communication between ClickHouse servers or between the client and server. For manual testing, trace context headers conforming to the Trace Context recommendation can be supplied to `clickhouse-client` using `--opentelemetry-traceparent` and `--opentelemetry-tracestate` flags. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 9879ee35612..22c8c704ba2 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,6 +974,13 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting +## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization} + +Enables compact mode for binary serialization of discriminators in Variant data type. +This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. + +Default value: true + ## merge_workload Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 530023df5b7..f8b40cd81ac 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1951,6 +1951,18 @@ The maximum allowed size for String in RowBinary format. It prevents allocating Default value: `1GiB`. +### output_format_binary_encode_types_in_binary_format {#output_format_binary_encode_types_in_binary_format} + +Write data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in RowBinaryWithNamesAndTypes output format. + +Disabled by default. + +### input_format_binary_decode_types_in_binary_format {#input_format_binary_decode_types_in_binary_format} + +Read data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in RowBinaryWithNamesAndTypes input format. + +Disabled by default. + ## Native format settings {#native-format-settings} ### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion} @@ -1958,3 +1970,15 @@ Default value: `1GiB`. Allow types conversion in Native input format between columns from input data and requested columns. Enabled by default. + +### output_format_native_encode_types_in_binary_format {#output_format_native_encode_types_in_binary_format} + +Write data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in Native output format. + +Disabled by default. + +### input_format_native_decode_types_in_binary_format {#input_format_native_decode_types_in_binary_format} + +Read data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in Native input format. + +Disabled by default. \ No newline at end of file diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1d74a63b972..c3f697c3bdc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1170,6 +1170,10 @@ Data in the VALUES clause of INSERT queries is processed by a separate stream pa Default value: 262144 (= 256 KiB). +:::note +`max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed. +::: + ## max_parser_depth {#max_parser_depth} Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size. @@ -1354,12 +1358,25 @@ Connection pool size for PostgreSQL table engine and database engine. Default value: 16 +## postgresql_connection_attempt_timeout {#postgresql-connection-attempt-timeout} + +Connection timeout in seconds of a single attempt to connect PostgreSQL end-point. +The value is passed as a `connect_timeout` parameter of the connection URL. + +Default value: `2`. + ## postgresql_connection_pool_wait_timeout {#postgresql-connection-pool-wait-timeout} Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. Default value: 5000 +## postgresql_connection_pool_retries {#postgresql-connection-pool-retries} + +The maximum number of retries to establish a connection with the PostgreSQL end-point. + +Default value: `2`. + ## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection} Close connection before returning connection to the pool. diff --git a/docs/en/operations/startup-scripts.md b/docs/en/operations/startup-scripts.md new file mode 100644 index 00000000000..91aa4772bcf --- /dev/null +++ b/docs/en/operations/startup-scripts.md @@ -0,0 +1,30 @@ +--- +slug: /en/operations/startup-scripts +sidebar_label: Startup Scripts +--- + +# Startup Scripts + +ClickHouse can run arbitrary SQL queries from the server configuration during startup. This can be useful for migrations or automatic schema creation. + +```xml + + + + CREATE ROLE OR REPLACE test_role + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + +``` + +ClickHouse executes all queries from the `startup_scripts` sequentially in the specified order. If any of the queries fail, the execution of the following queries won't be interrupted. + +You can specify a conditional query in the config. In that case, the corresponding query executes only when the condition query returns the value `1` or `true`. + +:::note +If the condition query returns any other value than `1` or `true`, the result will be interpreted as `false`, and the corresponding won't be executed. +::: diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 83ce817b7db..f253b164e2a 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -357,7 +357,7 @@ Number of currently running inserts to Kafka Number of alive connections -### KeeperOutstandingRequets +### KeeperOutstandingRequests Number of outstanding requests diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index 76db9e41836..e22bc06b641 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -4,35 +4,56 @@ sidebar_position: 59 sidebar_label: clickhouse-disks --- -# clickhouse-disks +# Clickhouse-disks -A utility providing filesystem-like operations for ClickHouse disks. +A utility providing filesystem-like operations for ClickHouse disks. It can work in both interactive and not interactive modes. -Program-wide options: +## Program-wide options * `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. * `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. +* `--query, -q` -- single query that can be executed without launching interactive mode +* `--help, -h` -- print all the options and commands with description + +## Default Disks +After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). + +## Clickhouse-disks state +For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks. + +State is reflected in a prompt "`disk_name`:`path_name`" ## Commands -* `copy [--disk-from d1] [--disk-to d2] `. - Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) - to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). -* `move `. - Move file or directory from `FROM_PATH` to `TO_PATH`. -* `remove `. - Remove `PATH` recursively. -* `link `. - Create a hardlink from `FROM_PATH` to `TO_PATH`. -* `list [--recursive] ...` - List files at `PATH`s. Non-recursive by default. -* `list-disks`. +In these documentation file all mandatory positional arguments are referred as ``, named arguments are referred as `[--parameter value]`. All positional parameters could be mentioned as a named parameter with a corresponding name. + +* `cd (change-dir, change_dir) [--disk disk] ` + Change directory to path `path` on disk `disk` (default value is a current disk). No disk switching happens. +* `copy (cp) [--disk-from disk_1] [--disk-to disk_2] `. + Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode)) + to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)). +* `current_disk_with_path (current, current_disk, current_path)` + Print current state in format: + `Disk: "current_disk" Path: "current path on current disk"` +* `help []` + Print help message about command `command`. If `command` is not specified print information about all commands. +* `move (mv) `. + Move file or directory from `path-from` to `path-to` within current disk. +* `remove (rm, delete) `. + Remove `path` recursively on a current disk. +* `link (ln) `. + Create a hardlink from `path-from` to `path-to` on a current disk. +* `list (ls) [--recursive] ` + List files at `path`s on a current disk. Non-recursive by default. +* `list-disks (list_disks, ls-disks, ls_disks)`. List disks names. -* `mkdir [--recursive] `. +* `mkdir [--recursive] ` on a current disk. Create a directory. Non-recursive by default. -* `read: []` - Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). -* `write [FROM_PATH] `. - Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. +* `read (r) [--path-to path]` + Read a file from `path-from` to `path` (`stdout` if not supplied). +* `switch-disk [--path path] ` + Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`). +* `write (w) [--path-from path] `. + Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`. diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index f19643a3fa5..c20e4fc3b09 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -16,7 +16,7 @@ sidebar_label: clickhouse-local While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`. -Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). +Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local file](#query_data_in_file) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). ## Download clickhouse-local diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 96bf0c5d93b..5056ef2c7aa 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -18,7 +18,7 @@ ClickHouse also supports: During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. -There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases (`any` and `anyLast` respectively) when followed by the modifier `RESPECT NULLS`. For example, `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** diff --git a/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md new file mode 100644 index 00000000000..fdbfd5b9e41 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md @@ -0,0 +1,37 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/aggthrow +sidebar_position: 101 +--- + +# aggThrow + +This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability. + +**Syntax** + +```sql +aggThrow(throw_prob) +``` + +**Arguments** + +- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md). + +**Returned value** + +- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`. + +**Example** + +Query: + +```sql +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; +``` + +Result: + +```response +Received exception: +Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW) +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index cdff7dde4a9..972263585f2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -5,12 +5,12 @@ sidebar_position: 102 # any -Selects the first encountered value of a column. +Selects the first encountered value of a column, ignoring any `NULL` values. **Syntax** ```sql -any(column) +any(column) [RESPECT NULLS] ``` Aliases: `any_value`, [`first_value`](../reference/first_value.md). @@ -20,7 +20,9 @@ Aliases: `any_value`, [`first_value`](../reference/first_value.md). **Returned value** -By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: :::note The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. diff --git a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md deleted file mode 100644 index 99104a9b8c7..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls -sidebar_position: 103 ---- - -# any_respect_nulls - -Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not. - -Alias: `any_value_respect_nulls`, `first_value_repect_nulls`. - -**Syntax** - -```sql -any_respect_nulls(column) -``` - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is a `NULL` value or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL); - -SELECT any(city), any_respect_nulls(city) FROM any_nulls; -``` - -```response -┌─any(city)─┬─any_respect_nulls(city)─┐ -│ Amsterdam │ ᴺᵁᴸᴸ │ -└───────────┴─────────────────────────┘ -``` - -**See Also** -- [any](../reference/any.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index e43bc07fbdc..202d2e9fb10 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -5,17 +5,21 @@ sidebar_position: 105 # anyLast -Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. +Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. **Syntax** ```sql -anyLast(column) +anyLast(column) [RESPECT NULLS] ``` **Parameters** - `column`: The column name. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: + **Returned value** - The last value encountered. diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md deleted file mode 100644 index 8f093cfdb61..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 106 ---- - -# anyLast_respect_nulls - -Selects the last value encountered, irregardless of whether it is `NULL` or not. - -**Syntax** - -```sql -anyLast_respect_nulls(column) -``` - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is `NULL` or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL); - -SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls; -``` - -```response -┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐ -│ Valencia │ ᴺᵁᴸᴸ │ -└───────────────┴─────────────────────────────┘ -``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..2dce0afe2e1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -43,11 +43,11 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [aggThrow](../reference/aggthrow.md) - [analysisOfVariance](../reference/analysis_of_variance.md) -- [any](../reference/any_respect_nulls.md) +- [any](../reference/any.md) - [anyHeavy](../reference/anyheavy.md) - [anyLast](../reference/anylast.md) -- [anyLast](../reference/anylast_respect_nulls.md) - [boundingRatio](../reference/boundrat.md) - [first_value](../reference/first_value.md) - [last_value](../reference/last_value.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index c9c6913249c..73075c0823d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -5,23 +5,45 @@ sidebar_position: 165 # maxMap -Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))` - Calculates the maximum from `value` array according to the keys specified in the `key` array. -Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values. +**Syntax** -The number of elements in `key` and `value` must be the same for each row that is totaled. +```sql +maxMap(key, value) +``` +or +```sql +maxMap(Tuple(key, value)) +``` -Returns a tuple of two arrays: keys and values calculated for the corresponding keys. +Alias: `maxMappedArrays` -Example: +:::note +- Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values. +- The number of elements in `key` and `value` must be the same for each row that is totaled. +::: + +**Parameters** + +- `key` — Array of keys. [Array](../../data-types/array.md). +- `value` — Array of values. [Array](../../data-types/array.md). + +**Returned value** + +- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)). + +**Example** + +Query: ``` sql SELECT maxMap(a, b) FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1])) ``` +Result: + ``` text ┌─maxMap(a, b)───────────┐ │ [['x','y','z'],[2,3,1]]│ diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index b1fbb9e49f3..c0f340b3f3f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -5,23 +5,45 @@ sidebar_position: 169 # minMap -Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))` - Calculates the minimum from `value` array according to the keys specified in the `key` array. -Passing a tuple of keys and value ​​arrays is identical to passing two arrays of keys and values. +**Syntax** -The number of elements in `key` and `value` must be the same for each row that is totaled. +```sql +`minMap(key, value)` +``` +or +```sql +minMap(Tuple(key, value)) +``` -Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. +Alias: `minMappedArrays` -Example: +:::note +- Passing a tuple of keys and value arrays is identical to passing an array of keys and an array of values. +- The number of elements in `key` and `value` must be the same for each row that is totaled. +::: + +**Parameters** + +- `key` — Array of keys. [Array](../../data-types/array.md). +- `value` — Array of values. [Array](../../data-types/array.md). + +**Returned value** + +- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)). + +**Example** + +Query: ``` sql SELECT minMap(a, b) FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) ``` +Result: + ``` text ┌─minMap(a, b)──────┐ │ ([1,2,3],[2,1,1]) │ diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md index 21344b58ba6..19154c488d9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -16,7 +16,7 @@ singleValueOrNull(x) **Parameters** -- `x` — Column of any [data type](../../data-types/index.md). +- `x` — Column of any [data type](../../data-types/index.md) (except [Map](../../data-types/map.md), [Array](../../data-types/array.md) or [Tuple](../../data-types/tuple) which cannot be of type [Nullable](../../data-types/nullable.md)). **Returned values** diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md new file mode 100644 index 00000000000..812e946e43e --- /dev/null +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -0,0 +1,115 @@ +--- +slug: /en/sql-reference/data-types/data-types-binary-encoding +sidebar_position: 56 +sidebar_label: Data types binary encoding specification. +--- + + +# Data types binary encoding specification + +This specification describes the binary format that can be used for binary encoding and decoding of ClickHouse data types. This format is used in `Dynamic` column [binary serialization](dynamic.md#binary-output-format) and can be used in input/output formats [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes) and [Native](../../interfaces/formats.md#native) under corresponding settings. + +The table below describes how each data type is represented in binary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information. +`var_uint` in the binary encoding means that the size is encoded using Variable-Length Quantity compression. + +| ClickHouse data type | Binary encoding | +|--------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Nothing` | `0x00` | +| `UInt8` | `0x01` | +| `UInt16` | `0x02` | +| `UInt32` | `0x03` | +| `UInt64` | `0x04` | +| `UInt128` | `0x05` | +| `UInt256` | `0x06` | +| `Int8` | `0x07` | +| `Int16` | `0x08` | +| `Int32` | `0x09` | +| `Int64` | `0x0A` | +| `Int128` | `0x0B` | +| `Int256` | `0x0C` | +| `Float32` | `0x0D` | +| `Float64` | `0x0E` | +| `Date` | `0x0F` | +| `Date32` | `0x10` | +| `DateTime` | `0x11` | +| `DateTime(time_zone)` | `0x12` | +| `DateTime64(P)` | `0x13` | +| `DateTime64(P, time_zone)` | `0x14` | +| `String` | `0x15` | +| `FixedString(N)` | `0x16` | +| `Enum8` | `0x17...` | +| `Enum16` | `0x18...>` | +| `Decimal32(P, S)` | `0x19` | +| `Decimal64(P, S)` | `0x1A` | +| `Decimal128(P, S)` | `0x1B` | +| `Decimal256(P, S)` | `0x1C` | +| `UUID` | `0x1D` | +| `Array(T)` | `0x1E` | +| `Tuple(T1, ..., TN)` | `0x1F...` | +| `Tuple(name1 T1, ..., nameN TN)` | `0x20...` | +| `Set` | `0x21` | +| `Interval` | `0x22` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | +| `Nullable(T)` | `0x23` | +| `Function` | `0x24...` | +| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `LowCardinality(T)` | `0x26` | +| `Map(K, V)` | `0x27` | +| `IPv4` | `0x28` | +| `IPv6` | `0x29` | +| `Variant(T1, ..., TN)` | `0x2A...` | +| `Dynamic(max_types=N)` | `0x2B` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C` | +| `Bool` | `0x2D` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `Nested(name1 T1, ..., nameN TN)` | `0x2F...` | + + +### Interval kind binary encoding + +The table below describes how different interval kinds of `Interval` data type are encoded. + +| Interval kind | Binary encoding | +|---------------|-----------------| +| `Nanosecond` | `0x00` | +| `Microsecond` | `0x01` | +| `Millisecond` | `0x02` | +| `Second` | `0x03` | +| `Minute` | `0x04` | +| `Hour` | `0x05` | +| `Day` | `0x06` | +| `Week` | `0x07` | +| `Month` | `0x08` | +| `Quarter` | `0x09` | +| `Year` | `0x1A` | + +### Aggregate function parameter binary encoding + +The table below describes how parameters of `AggragateFunction` and `SimpleAggregateFunction` are encoded. +The encoding of a parameter consists of 1 byte indicating the type of the parameter and the value itself. + +| Parameter type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 955fd54e641..8be81471377 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/dynamic -sidebar_position: 56 +sidebar_position: 62 sidebar_label: Dynamic --- @@ -493,3 +493,44 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O ``` As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. + +## JSONExtract functions with Dynamic + +All `JSONExtract*` functions support `Dynamic` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(dynamic) AS dynamic_type; +``` + +```text +┌─dynamic─┬─dynamic_type───────────┐ +│ [1,2,3] │ Array(Nullable(Int64)) │ +└─────────┴────────────────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types``` + +```text +┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types``` +``` + +```text +┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` + +### Binary output format + +In RowBinary format values of `Dynamic` type are serialized in the following format: + +```text + +``` diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index c29be2cff58..f2eac12594d 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -5,11 +5,11 @@ sidebar_label: Object Data Type keywords: [object, data type] --- -# Object Data Type +# Object Data Type (deprecated) -:::note -This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) -::: +**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864). + +
Stores JavaScript Object Notation (JSON) documents in a single column. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index d87ca4a0fe7..1b52440903d 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -3080,4 +3080,4 @@ Result: ## Distance functions -All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). +All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). \ No newline at end of file diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b532e0de8f0..46b1167fa33 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -83,7 +83,57 @@ Result: ``` ## makeDate32 -Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). +Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day). + +**Syntax** + +```sql +makeDate32(year, [month,] day) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +:::note +If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`. +::: + +**Returned values** + +- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md). + +**Examples** + +Create a date from a year, month, and day: + +Query: + +```sql +SELECT makeDate32(2024, 1, 1); +``` + +Result: + +```response +2024-01-01 +``` + +Create a Date from a year and day of year: + +Query: + +``` sql +SELECT makeDate32(2024, 100); +``` + +Result: + +```response +2024-04-09 +``` ## makeDateTime @@ -125,12 +175,38 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). +Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision. **Syntax** +```sql +makeDateTime64(year, month, day, hour, minute, second[, precision]) +``` + +**Arguments** + +- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + ``` sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); +``` + +```response +┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐ +│ 2023-05-15 10:30:45.00779 │ +└─────────────────────────────────────────────────┘ ``` ## timestamp diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e431ed75465..7c977e7d6dc 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -314,10 +314,71 @@ SELECT groupBitXor(cityHash64(*)) FROM table Calculates a 32-bit hash code from any type of integer. This is a relatively fast non-cryptographic hash function of average quality for numbers. +**Syntax** + +```sql +intHash32(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 32-bit hash code. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash32(42); +``` + +Result: + +```response +┌─intHash32(42)─┐ +│ 1228623923 │ +└───────────────┘ +``` + ## intHash64 Calculates a 64-bit hash code from any type of integer. -It works faster than intHash32. Average quality. +This is a relatively fast non-cryptographic hash function of average quality for numbers. +It works faster than [intHash32](#inthash32). + +**Syntax** + +```sql +intHash64(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 64-bit hash code. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash64(42); +``` + +Result: + +```response +┌────────intHash64(42)─┐ +│ 11490350930367293593 │ +└──────────────────────┘ +``` ## SHA1, SHA224, SHA256, SHA512, SHA512_256 diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 4e252785715..8f9fd9abb0d 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -This function is case-insensitive. +Aliases: `fullHostName`, 'FQDN'. **Returned value** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 894b9026165..e2f1b8c7f14 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -12,9 +12,7 @@ Functions for [searching](string-search-functions.md) in strings and for [replac ## empty -Checks whether the input string is empty. - -A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. +Checks whether the input string is empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty). @@ -48,9 +46,7 @@ Result: ## notEmpty -Checks whether the input string is non-empty. - -A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. +Checks whether the input string is non-empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty). @@ -96,7 +92,7 @@ length(s) **Parameters** -- `s`: An input string or array. [String](../data-types/string)/[Array](../data-types/array). +- `s` — An input string or array. [String](../data-types/string)/[Array](../data-types/array). **Returned value** @@ -149,7 +145,7 @@ lengthUTF8(s) **Parameters** -- `s`: String containing valid UTF-8 encoded text. [String](../data-types/string). +- `s` — String containing valid UTF-8 encoded text. [String](../data-types/string). **Returned value** @@ -183,8 +179,8 @@ left(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -230,8 +226,8 @@ leftUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -347,8 +343,8 @@ right(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -394,8 +390,8 @@ rightUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -547,7 +543,7 @@ Alias: `ucase` **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `input` — A string type [String](../data-types/string.md). **Returned value** @@ -571,16 +567,47 @@ SELECT upper('clickhouse'); Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +:::note +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point. +::: -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +**Syntax** + +```sql +lowerUTF8(input) +``` + +**Parameters** + +- `input` — A string type [String](../data-types/string.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +Query: + +``` sql +SELECT lowerUTF8('MÜNCHEN') as Lowerutf8; +``` + +Result: + +``` response +┌─Lowerutf8─┐ +│ münchen │ +└───────────┘ +``` ## upperUTF8 Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +:::note +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point. +::: **Syntax** @@ -590,7 +617,7 @@ upperUTF8(input) **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `input` — A string type [String](../data-types/string.md). **Returned value** @@ -604,6 +631,8 @@ Query: SELECT upperUTF8('München') as Upperutf8; ``` +Result: + ``` response ┌─Upperutf8─┐ │ MÜNCHEN │ @@ -614,6 +643,34 @@ SELECT upperUTF8('München') as Upperutf8; Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. +**Syntax** + +``` sql +isValidUTF8(input) +``` + +**Parameters** + +- `input` — A string type [String](../data-types/string.md). + +**Returned value** + +- Returns `1`, if the set of bytes constitutes valid UTF-8-encoded text, otherwise `0`. + +Query: + +``` sql +SELECT isValidUTF8('\xc3\xb1') AS valid, isValidUTF8('\xc3\x28') AS invalid; +``` + +Result: + +``` response +┌─valid─┬─invalid─┐ +│ 1 │ 0 │ +└───────┴─────────┘ +``` + ## toValidUTF8 Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character. @@ -883,7 +940,7 @@ Returns the substring of a string `s` which starts at the specified byte index ` substring(s, offset[, length]) ``` -Alias: +Aliases: - `substr` - `mid` - `byteSlice` @@ -926,9 +983,9 @@ substringUTF8(s, offset[, length]) **Arguments** -- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) -- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). -- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. +- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -964,9 +1021,9 @@ Alias: `SUBSTRING_INDEX` **Arguments** -- s: The string to extract substring from. [String](../data-types/string.md). -- delim: The character to split. [String](../data-types/string.md). -- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- s — The string to extract substring from. [String](../data-types/string.md). +- delim — The character to split. [String](../data-types/string.md). +- count — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** @@ -995,9 +1052,9 @@ substringIndexUTF8(s, delim, count) **Arguments** -- `s`: The string to extract substring from. [String](../data-types/string.md). -- `delim`: The character to split. [String](../data-types/string.md). -- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- `s` — The string to extract substring from. [String](../data-types/string.md). +- `delim` — The character to split. [String](../data-types/string.md). +- `count` — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Returned value** @@ -1277,7 +1334,7 @@ tryBase64Decode(encoded) **Arguments** -- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. **Returned value** @@ -1309,7 +1366,7 @@ tryBase64URLDecode(encodedUrl) **Parameters** -- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. **Returned value** @@ -1555,7 +1612,7 @@ The result type is UInt64. ## normalizeQuery -Replaces literals, sequences of literals and complex aliases with placeholders. +Replaces literals, sequences of literals and complex aliases (containing whitespace, more than two digits or at least 36 bytes long such as UUIDs) with placeholder `?`. **Syntax** @@ -1573,6 +1630,8 @@ normalizeQuery(x) **Example** +Query: + ``` sql SELECT normalizeQuery('[1, 2, 3, x]') AS query; ``` @@ -1585,9 +1644,44 @@ Result: └──────────┘ ``` +## normalizeQueryKeepNames + +Replaces literals, sequences of literals with placeholder `?` but does not replace complex aliases (containing whitespace, more than two digits +or at least 36 bytes long such as UUIDs). This helps better analyze complex query logs. + +**Syntax** + +``` sql +normalizeQueryKeepNames(x) +``` + +**Arguments** + +- `x` — Sequence of characters. [String](../data-types/string.md). + +**Returned value** + +- Sequence of characters with placeholders. [String](../data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT normalizeQuery('SELECT 1 AS aComplexName123'), normalizeQueryKeepNames('SELECT 1 AS aComplexName123'); +``` + +Result: + +```result +┌─normalizeQuery('SELECT 1 AS aComplexName123')─┬─normalizeQueryKeepNames('SELECT 1 AS aComplexName123')─┐ +│ SELECT ? AS `?` │ SELECT ? AS aComplexName123 │ +└───────────────────────────────────────────────┴────────────────────────────────────────────────────────┘ +``` + ## normalizedQueryHash -Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log. +Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query logs. **Syntax** @@ -1605,6 +1699,8 @@ normalizedQueryHash(x) **Example** +Query: + ``` sql SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; ``` @@ -1617,6 +1713,43 @@ Result: └─────┘ ``` +## normalizedQueryHashKeepNames + +Like [normalizedQueryHash](#normalizedqueryhash) it returns identical 64bit hash values without the values of literals for similar queries but it does not replace complex aliases (containing whitespace, more than two digits +or at least 36 bytes long such as UUIDs) with a placeholder before hashing. Can be helpful to analyze query logs. + +**Syntax** + +``` sql +normalizedQueryHashKeepNames(x) +``` + +**Arguments** + +- `x` — Sequence of characters. [String](../data-types/string.md). + +**Returned value** + +- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges). + +**Example** + +``` sql +SELECT normalizedQueryHash('SELECT 1 AS `xyz123`') != normalizedQueryHash('SELECT 1 AS `abc123`') AS normalizedQueryHash; +SELECT normalizedQueryHashKeepNames('SELECT 1 AS `xyz123`') != normalizedQueryHashKeepNames('SELECT 1 AS `abc123`') AS normalizedQueryHashKeepNames; +``` + +Result: + +```result +┌─normalizedQueryHash─┐ +│ 0 │ +└─────────────────────┘ +┌─normalizedQueryHashKeepNames─┐ +│ 1 │ +└──────────────────────────────┘ +``` + ## normalizeUTF8NFC Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. @@ -1935,7 +2068,7 @@ soundex(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -1968,7 +2101,7 @@ punycodeEncode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2001,7 +2134,7 @@ punycodeEncode(val) **Arguments** -- `val` - Punycode-encoded string. [String](../data-types/string.md) +- `val` — Punycode-encoded string. [String](../data-types/string.md) **Returned value** @@ -2027,7 +2160,7 @@ Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded s ## idnaEncode -Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +Returns the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown. Note: No percent decoding or trimming of tabs, spaces or control characters is performed. @@ -2039,7 +2172,7 @@ idnaEncode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2065,7 +2198,7 @@ Like `idnaEncode` but returns an empty string in case of an error instead of thr ## idnaDecode -Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +Returns the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. In case of an error (e.g. because the input is invalid), the input string is returned. Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization. @@ -2077,7 +2210,7 @@ idnaDecode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2121,7 +2254,7 @@ Result: └───────────────────────────────────────────┘ ``` -Alias: mismatches +Alias: `mismatches` ## stringJaccardIndex @@ -2175,7 +2308,7 @@ Result: └─────────────────────────────────────┘ ``` -Alias: levenshteinDistance +Alias: `levenshteinDistance` ## editDistanceUTF8 @@ -2201,7 +2334,7 @@ Result: └─────────────────────────────────────┘ ``` -Alias: levenshteinDistanceUTF8 +Alias: `levenshteinDistanceUTF8` ## damerauLevenshteinDistance @@ -2279,13 +2412,93 @@ Result: Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. +:::note +Because `initCap` converts only the first letter of each word to upper case you may observe unexpected behaviour for words containing apostrophes or capital letters. For example: + +```sql +SELECT initCap('mother''s daughter'), initCap('joe McAdam'); +``` + +will return + +```response +┌─initCap('mother\'s daughter')─┬─initCap('joe McAdam')─┐ +│ Mother'S Daughter │ Joe Mcadam │ +└───────────────────────────────┴───────────────────────┘ +``` + +This is a known behaviour, with no plans currently to fix it. +::: + +**Syntax** + +```sql +initcap(val) +``` + +**Arguments** + +- `val` — Input value. [String](../data-types/string.md). + +**Returned value** + +- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT initcap('building for fast'); +``` + +Result: + +```text +┌─initcap('building for fast')─┐ +│ Building For Fast │ +└──────────────────────────────┘ +``` + ## initcapUTF8 -Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Like [initcap](#initcap), `initcapUTF8` converts the first letter of each word to upper case and the rest to lower case. Assumes that the string contains valid UTF-8 encoded text. +If this assumption is violated, no exception is thrown and the result is undefined. +:::note +This function does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +::: + +**Syntax** + +```sql +initcapUTF8(val) +``` + +**Arguments** + +- `val` — Input value. [String](../data-types/string.md). + +**Returned value** + +- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT initcapUTF8('не тормозит'); +``` + +Result: + +```text +┌─initcapUTF8('не тормозит')─┐ +│ Не Тормозит │ +└────────────────────────────┘ +``` ## firstLine @@ -2299,7 +2512,7 @@ firstLine(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 7aeb1f5b2a7..8793ebdd1a3 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -34,7 +34,7 @@ Alias: `replace`. Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string. -`replacement` can containing substitutions `\0-\9`. +`replacement` can contain substitutions `\0-\9`. Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`. diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2cec1987c20..5169d4487ec 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -6,44 +6,122 @@ sidebar_label: Time Window # Time Window Functions -Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below: ## tumble A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). +**Syntax** + ``` sql tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)). **Example** Query: ``` sql -SELECT tumble(now(), toIntervalDay('1')) +SELECT tumble(now(), toIntervalDay('1')); ``` Result: ``` text ┌─tumble(now(), toIntervalDay('1'))─────────────┐ -│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │ └───────────────────────────────────────────────┘ ``` +## tumbleStart + +Returns the inclusive lower bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleStart(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleStart(now(), toIntervalDay('1'))─┐ +│ 2024-07-04 00:00:00 │ +└────────────────────────────────────────┘ +``` + +## tumbleEnd + +Returns the exclusive upper bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleEnd(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleEnd(now(), toIntervalDay('1'))─┐ +│ 2024-07-05 00:00:00 │ +└──────────────────────────────────────┘ +``` + ## hop -A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. ``` sql hop(time_attr, hop_interval, window_interval [, timezone]) @@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: **Example** Query: ``` sql -SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); ``` Result: ``` text -┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ -│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ -└───────────────────────────────────────────────────────────┘ -``` - -## tumbleStart - -Returns the inclusive lower bound of the corresponding tumbling window. - -``` sql -tumbleStart(bounds_tuple); -tumbleStart(time_attr, interval [, timezone]); -``` - -## tumbleEnd - -Returns the exclusive upper bound of the corresponding tumbling window. - -``` sql -tumbleEnd(bounds_tuple); -tumbleEnd(time_attr, interval [, timezone]); +┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │ +└────────────────────────────────────────────────────┘ ``` ## hopStart -Returns the inclusive lower bound of the corresponding hopping window. +Returns the inclusive lower bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopStart(bounds_tuple); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-03 00:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` ## hopEnd -Returns the exclusive upper bound of the corresponding hopping window. +Returns the exclusive upper bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-05 00:00:00 │ +└───────────────────────────────────────────────────────┘ + ``` ## Related content diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 0663be08240..3b4d68e44b2 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,7 +7,7 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. The function implements the operator `(x, y, ...)`. @@ -259,6 +259,60 @@ Result: └───────────────────────────────────────┘ ``` +## tupleNames + +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. + +**Syntax** + +``` sql +tupleNames(tuple) +``` + +**Arguments** + +- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. + +**Returned value** + +- An array with strings. + +Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). + +**Example** + +Query: + +``` sql +CREATE TABLE tupletest (col Tuple(user_ID UInt64, session_ID UInt64)) ENGINE = Memory; + +INSERT INTO tupletest VALUES (tuple(1, 2)); + +SELECT tupleNames(col) FROM tupletest; +``` + +Result: + +``` text +┌─tupleNames(col)──────────┐ +│ ['user_ID','session_ID'] │ +└──────────────────────────┘ +``` + +If you pass a simple tuple to the function, ClickHouse uses the indexes of the columns as their names: + +``` sql +SELECT tupleNames(tuple(3, 2, 1)); +``` + +Result: + +``` text +┌─tupleNames((3, 2, 1))─┐ +│ ['1','2','3'] │ +└───────────────────────┘ +``` + ## tuplePlus Calculates the sum of corresponding values of two tuples of the same size. diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index ad40725d680..24b356eca87 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -600,7 +600,7 @@ mapApply(func, map) **Arguments** -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` — [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../data-types/map.md). **Returned value** @@ -831,7 +831,39 @@ SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapPartialSort + +Sorts the elements of a map in ascending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k2':1,'k3':2,'k1':3} │ +└───────────────────────────────────────────────────────────────────────────┘ +``` ## mapReverseSort(\[func,\], map) @@ -861,3 +893,35 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; ``` For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). + +## mapPartialReverseSort + +Sorts the elements of a map in descending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialReverseSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k1':3,'k3':2,'k2':1} │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index b665ad85a09..6e18ace10c7 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -58,6 +58,8 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' KILL QUERY WHERE user='username' SYNC ``` +:::tip If you are killing a query in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the query is killed on all replicas::: + Read-only users can only stop their own queries. By default, the asynchronous version of queries is used (`ASYNC`), which does not wait for confirmation that queries have stopped. @@ -131,6 +133,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table' -- Cancel the specific mutation: KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt' ``` +:::tip If you are killing a mutation in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the mutation is killed on all replicas::: The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table). diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 3a3162dad9a..44b1b50620a 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -130,7 +130,9 @@ SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); ## Globs in path -Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. +Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. There is one exception that if the path refers to an existing +directory and does not use globs, a `*` will be implicitly added to the path so +all the files in the directory are selected. - `*` — Represents arbitrarily many characters except `/` but including the empty string. - `?` — Represents an arbitrary single character. @@ -163,6 +165,12 @@ An alternative path expression which achieves the same: SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` +Query the total number of rows in `some_dir` using the implicit `*`: + +```sql +SELECT count(*) FROM file('some_dir', 'TSV', 'name String, value UInt32'); +``` + :::note If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md new file mode 100644 index 00000000000..d6445b68c55 --- /dev/null +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -0,0 +1,73 @@ +--- +slug: /en/sql-reference/window-functions/dense_rank +sidebar_label: dense_rank +sidebar_position: 7 +--- + +# dense_rank + +Ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. + +The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking. + +**Syntax** + +```sql +dense_rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, without gaps in ranking. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + dense_rank() OVER (ORDER BY salary DESC) AS dense_rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─dense_rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 2 │ +4. │ Michael Stanley │ 150000 │ 3 │ +5. │ Douglas Benson │ 150000 │ 3 │ +6. │ Scott Harrison │ 150000 │ 3 │ +7. │ James Henderson │ 140000 │ 4 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md new file mode 100644 index 00000000000..30c3b1f99dc --- /dev/null +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/first_value +sidebar_label: first_value +sidebar_position: 3 +--- + +# first_value + +Returns the first value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. + +**Syntax** + +```sql +first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +Alias: `any`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The first value evaluated within its ordered frame. + +**Example** + +In this example the `first_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + first_value(player) OVER (ORDER BY salary DESC) AS highest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─highest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Gary Chen │ +2. │ Robert George │ 195000 │ Gary Chen │ +3. │ Charles Juarez │ 190000 │ Gary Chen │ +4. │ Scott Harrison │ 180000 │ Gary Chen │ +5. │ Douglas Benson │ 150000 │ Gary Chen │ +6. │ James Henderson │ 140000 │ Gary Chen │ +7. │ Michael Stanley │ 100000 │ Gary Chen │ + └─────────────────┴────────┴─────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 49076f3cbe1..0c3e2ea1cb6 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -1,10 +1,11 @@ --- slug: /en/sql-reference/window-functions/ -sidebar_position: 62 sidebar_label: Window Functions -title: Window Functions +sidebar_position: 1 --- +# Window Functions + Windows functions let you perform calculations across a set of rows that are related to the current row. Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. @@ -12,8 +13,8 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Supported? | -|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Feature | Supported? | +|--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | | expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | | `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | @@ -23,6 +24,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | @@ -74,14 +76,14 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. -- `row_number()` - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. -- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used. +- [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. +- [`first_value(x)`](./first_value.md) - Return the first value evaluated within its ordered frame. +- [`last_value(x)`](./last_value.md) - Return the last value evaluated within its ordered frame. +- [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- [`rank()`](./rank.md) - Rank the current row within its partition with gaps. +- [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. +- [`lagInFrame(x)`](./lagInFrame.md) - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- [`leadInFrame(x)`](./leadInFrame.md) - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md new file mode 100644 index 00000000000..049e095c10f --- /dev/null +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/lagInFrame +sidebar_label: lagInFrame +sidebar_position: 8 +--- + +# lagInFrame + +Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. + +**Syntax** + +```sql +lagInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- Value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. + +**Example** + +This example looks at historical data for a specific stock and uses the `lagInFrame` function to calculate a day-to-day delta and percentage change in the closing price of the stock. + +Query: + +```sql +CREATE TABLE stock_prices +( + `date` Date, + `open` Float32, -- opening price + `high` Float32, -- daily high + `low` Float32, -- daily low + `close` Float32, -- closing price + `volume` UInt32 -- trade volume +) +Engine = Memory; + +INSERT INTO stock_prices FORMAT Values + ('2024-06-03', 113.62, 115.00, 112.00, 115.00, 438392000), + ('2024-06-04', 115.72, 116.60, 114.04, 116.44, 403324000), + ('2024-06-05', 118.37, 122.45, 117.47, 122.44, 528402000), + ('2024-06-06', 124.05, 125.59, 118.32, 121.00, 664696000), + ('2024-06-07', 119.77, 121.69, 118.02, 120.89, 412386000); +``` + +```sql +SELECT + date, + close, + lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close, + COALESCE(ROUND(close - previous_day_close, 2)) AS delta, + COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change +FROM stock_prices +ORDER BY date DESC; +``` + +Result: + +```response + ┌───────date─┬──close─┬─previous_day_close─┬─delta─┬─percent_change─┐ +1. │ 2024-06-07 │ 120.89 │ 121 │ -0.11 │ -0.09 │ +2. │ 2024-06-06 │ 121 │ 122.44 │ -1.44 │ -1.18 │ +3. │ 2024-06-05 │ 122.44 │ 116.44 │ 6 │ 5.15 │ +4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │ +5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │ + └────────────┴────────┴────────────────────┴───────┴────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md new file mode 100644 index 00000000000..dd7f5fa078a --- /dev/null +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/last_value +sidebar_label: last_value +sidebar_position: 4 +--- + +# last_value + +Returns the last value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. + +**Syntax** + +```sql +last_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +Alias: `anyLast`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The last value evaluated within its ordered frame. + +**Example** + +In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + last_value(player) OVER (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─lowest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Michael Stanley │ +2. │ Robert George │ 195000 │ Michael Stanley │ +3. │ Charles Juarez │ 190000 │ Michael Stanley │ +4. │ Scott Harrison │ 180000 │ Michael Stanley │ +5. │ Douglas Benson │ 150000 │ Michael Stanley │ +6. │ James Henderson │ 140000 │ Michael Stanley │ +7. │ Michael Stanley │ 100000 │ Michael Stanley │ + └─────────────────┴────────┴────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md new file mode 100644 index 00000000000..fc1b92cc266 --- /dev/null +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -0,0 +1,60 @@ +--- +slug: /en/sql-reference/window-functions/leadInFrame +sidebar_label: leadInFrame +sidebar_position: 9 +--- + +# leadInFrame + +Returns a value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Syntax** + +```sql +leadInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Example** + +This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe1/nobel-prize-data) for Nobel Prize winners and uses the `leadInFrame` function to return a list of successive winners in the physics category. + +Query: + +```sql +CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *; +``` + +```sql +FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9; +``` + +Result: + +```response + ┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │ + └──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md new file mode 100644 index 00000000000..aa5baf651a8 --- /dev/null +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -0,0 +1,75 @@ +--- +slug: /en/sql-reference/window-functions/nth_value +sidebar_label: nth_value +sidebar_position: 5 +--- + +# nth_value + +Returns the first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +**Syntax** + +```sql +nth_value (x, offset) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** + +- `x` — Column name. +- `offset` — nth row to evaluate current row against. + +**Returned value** + +- The first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +**Example** + +In this example the `nth-value` function is used to find the third-highest salary from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, nth_value(player,3) OVER(ORDER BY salary DESC) AS third_highest_salary FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─third_highest_salary─┐ +1. │ Gary Chen │ 195000 │ │ +2. │ Robert George │ 195000 │ │ +3. │ Charles Juarez │ 190000 │ Charles Juarez │ +4. │ Scott Harrison │ 180000 │ Charles Juarez │ +5. │ Douglas Benson │ 150000 │ Charles Juarez │ +6. │ James Henderson │ 140000 │ Charles Juarez │ +7. │ Michael Stanley │ 100000 │ Charles Juarez │ + └─────────────────┴────────┴──────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md new file mode 100644 index 00000000000..dff5e154151 --- /dev/null +++ b/docs/en/sql-reference/window-functions/rank.md @@ -0,0 +1,74 @@ +--- +slug: /en/sql-reference/window-functions/rank +sidebar_label: rank +sidebar_position: 6 +--- + +# rank + +Ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. +The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given. + +The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. + +**Syntax** + +```sql +rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, including gaps. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + rank() OVER (ORDER BY salary DESC) AS rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Douglas Benson │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 4 │ +6. │ Scott Harrison │ 150000 │ 4 │ +7. │ James Henderson │ 140000 │ 7 │ + └─────────────────┴────────┴──────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md new file mode 100644 index 00000000000..f1c331f89a3 --- /dev/null +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -0,0 +1,67 @@ +--- +slug: /en/sql-reference/window-functions/row_number +sidebar_label: row_number +sidebar_position: 2 +--- + +# row_number + +Numbers the current row within its partition starting from 1. + +**Syntax** + +```sql +row_number (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary DESC) AS row_number +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─row_number─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 2 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Scott Harrison │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 5 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md index dafaf055980..da820c90a1e 100644 --- a/docs/ru/introduction/distinctive-features.md +++ b/docs/ru/introduction/distinctive-features.md @@ -12,7 +12,7 @@ sidebar_label: "Отличительные возможности ClickHouse" Этот пункт пришлось выделить, так как существуют системы, которые могут хранить значения отдельных столбцов по отдельности, но не могут эффективно выполнять аналитические запросы в силу оптимизации под другой сценарий работы. Примеры: HBase, BigTable, Cassandra, HyperTable. В этих системах вы получите пропускную способность в районе сотен тысяч строк в секунду, но не сотен миллионов строк в секунду. -Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базой данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. +Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базы данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. ## Сжатие данных {#szhatie-dannykh} diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 6343dc85d00..0126613f797 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1117,6 +1117,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (!options["user"].defaulted()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); config().setString("jwt", options["jwt"].as()); + config().setString("user", ""); } if (options.count("accept-invalid-certificate")) { diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index f0949fcfceb..7e8afe084fb 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -1,6 +1,8 @@ set (CLICKHOUSE_DISKS_SOURCES DisksApp.cpp + DisksClient.cpp ICommand.cpp + CommandChangeDirectory.cpp CommandCopy.cpp CommandLink.cpp CommandList.cpp @@ -9,10 +11,14 @@ set (CLICKHOUSE_DISKS_SOURCES CommandMove.cpp CommandRead.cpp CommandRemove.cpp - CommandWrite.cpp) + CommandSwitchDisk.cpp + CommandWrite.cpp + CommandHelp.cpp + CommandTouch.cpp + CommandGetCurrentDiskAndPath.cpp) if (CLICKHOUSE_CLOUD) - set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) + set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) endif () set (CLICKHOUSE_DISKS_LINK diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp new file mode 100644 index 00000000000..b545f37de72 --- /dev/null +++ b/programs/disks/CommandChangeDirectory.cpp @@ -0,0 +1,35 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandChangeDirectory final : public ICommand +{ +public: + explicit CommandChangeDirectory() : ICommand() + { + command_name = "cd"; + description = "Change directory (makes sense only in interactive mode)"; + options_description.add_options()("path", po::value(), "the path to which we want to change (mandatory, positional)")( + "disk", po::value(), "A disk where the path is changed (without disk switching)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + DiskWithPath & disk = getDiskWithPath(client, options, "disk"); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + disk.setPath(path); + } +}; + +CommandPtr makeCommandChangeDirectory() +{ + return std::make_shared(); +} + +} diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f176fa277d7..e3051f2702c 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,6 +1,8 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" #include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { @@ -10,59 +12,89 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandCopy final : public ICommand { public: - CommandCopy() + explicit CommandCopy() : ICommand() { command_name = "copy"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - usage = "copy [OPTION]... "; - command_option_description->add_options() - ("disk-from", po::value(), "disk from which we copy") - ("disk-to", po::value(), "disk to which we copy"); + description = "Recursively copy data from `path-from` to `path-to`"; + options_description.add_options()( + "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( + "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( + "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( + "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( + "recursive,r", "recursively copy the directory (required to remove a directory)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("disk-from")) - config.setString("disk-from", options["disk-from"].as()); - if (options.count("disk-to")) - config.setString("disk-to", options["disk-to"].as()); - } + auto disk_from = getDiskWithPath(client, options, "disk-from"); + auto disk_to = getDiskWithPath(client, options, "disk-to"); + String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + bool recursive = options.count("recursive"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) + if (!disk_from.getDisk()->exists(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk '{}': No such file or directory", + path_from, + disk_from.getDisk()->getName()); } + else if (disk_from.getDisk()->isFile(path_from)) + { + auto target_location = getTargetLocation(path_from, disk_to, path_to); + if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location)) + { + disk_from.getDisk()->copyFile( + path_from, + *disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from); + } + } + else if (disk_from.getDisk()->isDirectory(path_from)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from); + } + auto target_location = getTargetLocation(path_from, disk_to, path_to); - String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); - String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk_from = disk_selector->get(disk_name_from); - DiskPtr disk_to = disk_selector->get(disk_name_to); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + if (disk_to.getDisk()->isFile(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location); + } + else if (!disk_to.getDisk()->exists(target_location)) + { + disk_to.getDisk()->createDirectory(target_location); + } + disk_from.getDisk()->copyDirectoryContent( + path_from, + disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } } }; + +CommandPtr makeCommandCopy() +{ + return std::make_shared(); } -std::unique_ptr makeCommandCopy() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandGetCurrentDiskAndPath.cpp b/programs/disks/CommandGetCurrentDiskAndPath.cpp new file mode 100644 index 00000000000..15f8ef5aae8 --- /dev/null +++ b/programs/disks/CommandGetCurrentDiskAndPath.cpp @@ -0,0 +1,30 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandGetCurrentDiskAndPath final : public ICommand +{ +public: + explicit CommandGetCurrentDiskAndPath() : ICommand() + { + command_name = "current_disk_with_path"; + description = "Prints current disk and path (which coincide with the prompt)"; + } + + void executeImpl(const CommandLineOptions &, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + std::cout << "Disk: " << disk.getDisk()->getName() << "\nPath: " << disk.getCurrentPath() << std::endl; + } +}; + +CommandPtr makeCommandGetCurrentDiskAndPath() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandHelp.cpp b/programs/disks/CommandHelp.cpp new file mode 100644 index 00000000000..a3aee9498d3 --- /dev/null +++ b/programs/disks/CommandHelp.cpp @@ -0,0 +1,43 @@ +#include "DisksApp.h" +#include "ICommand.h" + +#include +#include + +namespace DB +{ + +class CommandHelp final : public ICommand +{ +public: + explicit CommandHelp(const DisksApp & disks_app_) : disks_app(disks_app_) + { + command_name = "help"; + description = "Print help message about available commands"; + options_description.add_options()( + "command", po::value(), "A command to help with (optional, positional), if not specified, help lists all the commands"); + positional_options_description.add("command", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & /*client*/) override + { + std::optional command = getValueFromCommandLineOptionsWithOptional(options, "command"); + if (command.has_value()) + { + disks_app.printCommandHelpMessage(command.value()); + } + else + { + disks_app.printAvailableCommandsHelpMessage(); + } + } + + const DisksApp & disks_app; +}; + +CommandPtr makeCommandHelp(const DisksApp & disks_app) +{ + return std::make_shared(disks_app); +} + +} diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index dbaa3162f82..11c196cafc5 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -1,14 +1,9 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandLink final : public ICommand { public: @@ -16,42 +11,27 @@ public: { command_name = "link"; description = "Create hardlink from `from_path` to `to_path`"; - usage = "link [OPTION]... "; + options_description.add_options()( + "path-from", po::value(), "the path from which a hard link will be created (mandatory, positional)")( + "path-to", po::value(), "the path where a hard link will be created (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + const String & path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + const String & path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk->createHardLink(relative_path_from, relative_path_to); + disk.getDisk()->createHardLink(path_from, path_to); } }; + +CommandPtr makeCommandLink() +{ + return std::make_shared(); } -std::unique_ptr makeCommandLink() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 7213802ea86..77479b1d217 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -1,98 +1,95 @@ -#include "ICommand.h" #include #include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandList final : public ICommand { public: - CommandList() + explicit CommandList() : ICommand() { command_name = "list"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "List files at path[s]"; - usage = "list [OPTION]... ..."; - command_option_description->add_options() - ("recursive", "recursively list all directories"); + options_description.add_options()("recursive", "recursively list the directory")("all", "show hidden files")( + "path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - bool recursive = config.getBool("recursive", false); + bool recursive = options.count("recursive"); + bool show_hidden = options.count("all"); + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsWithDefault(options, "path", "."); if (recursive) - listRecursive(disk, relative_path); + listRecursive(disk, path, show_hidden); else - list(disk, relative_path); + list(disk, path, show_hidden); } private: - static void list(const DiskPtr & disk, const std::string & relative_path) + static void list(const DiskWithPath & disk, const std::string & path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(path); + std::vector selected_and_sorted_file_names{}; for (const auto & file_name : file_names) - std::cout << file_name << '\n'; + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) + { + std::cout << file_name << "\n"; + } } - static void listRecursive(const DiskPtr & disk, const std::string & relative_path) + static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(relative_path); + std::vector selected_and_sorted_file_names{}; std::cout << relative_path << ":\n"; - if (!file_names.empty()) - { - for (const auto & file_name : file_names) - std::cout << file_name << '\n'; - std::cout << "\n"; - } - for (const auto & file_name : file_names) + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) { - auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name); - if (disk->isDirectory(path)) - listRecursive(disk, path); + std::cout << file_name << "\n"; + } + std::cout << "\n"; + + for (const auto & file_name : selected_and_sorted_file_names) + { + auto path = [&]() -> String + { + if (relative_path.ends_with("/")) + { + return relative_path + file_name; + } + else + { + return relative_path + "/" + file_name; + } + }(); + if (disk.isDirectory(path)) + { + listRecursive(disk, path, show_hidden); + } } } }; -} -std::unique_ptr makeCommandList() +CommandPtr makeCommandList() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 79da021fd00..9fb67fed5e0 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -1,68 +1,40 @@ -#include "ICommand.h" +#include #include +#include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandListDisks final : public ICommand { public: - CommandListDisks() + explicit CommandListDisks() : ICommand() { command_name = "list-disks"; - description = "List disks names"; - usage = "list-disks [OPTION]"; + description = "Lists all available disks"; } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr &, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions &, DisksClient & client) override { - if (!command_arguments.empty()) + std::vector sorted_and_selected{}; + for (const auto & disk_name : client.getAllDiskNames()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + sorted_and_selected.push_back(disk_name + ":" + client.getDiskWithPath(disk_name).getAbsolutePath("")); } - - constexpr auto config_prefix = "storage_configuration.disks"; - constexpr auto default_disk_name = "default"; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - bool has_default_disk = false; - - /// For the output to be ordered - std::set disks; - - for (const auto & disk_name : keys) + std::sort(sorted_and_selected.begin(), sorted_and_selected.end()); + for (const auto & disk_name : sorted_and_selected) { - if (disk_name == default_disk_name) - has_default_disk = true; - disks.insert(disk_name); + std::cout << disk_name << "\n"; } - - if (!has_default_disk) - disks.insert(default_disk_name); - - for (const auto & disk : disks) - std::cout << disk << '\n'; } -}; -} -std::unique_ptr makeCommandListDisks() +private: +}; + +CommandPtr makeCommandListDisks() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 6d33bdec498..c6222f326d4 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -6,61 +6,35 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandMkDir final : public ICommand { public: CommandMkDir() { command_name = "mkdir"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Create a directory"; - usage = "mkdir [OPTION]... "; - command_option_description->add_options() - ("recursive", "recursively create directories"); + description = "Creates a directory"; + options_description.add_options()("parents", "recursively create directories")( + "path", po::value(), "the path on which directory should be created (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } + bool recursive = options.count("parents"); + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - bool recursive = config.getBool("recursive", false); + String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); if (recursive) - disk->createDirectories(relative_path); + disk.getDisk()->createDirectories(path); else - disk->createDirectory(relative_path); + disk.getDisk()->createDirectory(path); } }; + +CommandPtr makeCommandMkDir() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMkDir() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 75cf96252ed..e3d485032e0 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,5 +1,5 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { @@ -9,6 +9,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandMove final : public ICommand { public: @@ -16,44 +17,62 @@ public: { command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; - usage = "move [OPTION]... "; + options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 2) + auto disk = client.getCurrentDiskWithPath(); + + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + if (disk.getDisk()->isFile(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + disk.getDisk()->moveFile(path_from, path_to); + } + else if (disk.getDisk()->isDirectory(path_from)) + { + auto target_location = getTargetLocation(path_from, disk, path_to); + if (!disk.getDisk()->exists(target_location)) + { + disk.getDisk()->createDirectory(target_location); + disk.getDisk()->moveDirectory(path_from, target_location); + } + else + { + if (disk.getDisk()->isFile(target_location)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from); + } + if (!disk.getDisk()->isDirectoryEmpty(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location); + } + else + { + disk.getDisk()->moveDirectory(path_from, target_location); + } + } + } + else if (!disk.getDisk()->exists(path_from)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk: '{}': No such file or directory", + path_from, + disk.getDisk()->getName()); } - - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - if (disk->isFile(relative_path_from)) - disk->moveFile(relative_path_from, relative_path_to); - else - disk->moveDirectory(relative_path_from, relative_path_to); } }; + +CommandPtr makeCommandMove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 0f3ac7ab98c..277e735f507 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -1,78 +1,52 @@ -#include "ICommand.h" -#include #include #include #include +#include #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandRead final : public ICommand { public: CommandRead() { command_name = "read"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Read a file from `FROM_PATH` to `TO_PATH`"; - usage = "read [OPTION]... []"; - command_option_description->add_options() - ("output", po::value(), "file to which we are reading, defaults to `stdout`"); + description = "Read a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading (mandatory, positional)")( + "path-to", po::value(), "file to which we are writing, defaults to `stdout`"); + positional_options_description.add("path-from", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("output")) - config.setString("output", options["output"].as()); - } + auto disk = client.getCurrentDiskWithPath(); + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + auto in = disk.getDisk()->readFile(path_from); + std::unique_ptr out = {}; + if (path_to.has_value()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(command_arguments[0]); - - String path_output = config.getString("output", ""); - - if (!path_output.empty()) - { - String relative_path_output = validatePathAndGetAsRelative(path_output); - - auto in = disk->readFile(relative_path); - auto out = disk->writeFile(relative_path_output); + String relative_path_to = disk.getRelativeFromRoot(path_to.value()); + out = disk.getDisk()->writeFile(relative_path_to); copyData(*in, *out); - out->finalize(); } else { - auto in = disk->readFile(relative_path); - std::unique_ptr out = std::make_unique(STDOUT_FILENO); + out = std::make_unique(STDOUT_FILENO); copyData(*in, *out); + out->write('\n'); } + out->finalize(); } }; + +CommandPtr makeCommandRead() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRead() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 0c631eacff3..4388f6c0b14 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,5 +1,6 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" +#include "ICommand.h" namespace DB { @@ -9,46 +10,49 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandRemove final : public ICommand { public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' or './path' or 'path'"; - usage = "remove [OPTION]... "; + description = "Remove file or directory. Throws exception if file doesn't exists"; + options_description.add_options()("path", po::value(), "path that is going to be deleted (mandatory, positional)")( + "recursive,r", "recursively removes the directory (required to remove a directory)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 1) + auto disk = client.getCurrentDiskWithPath(); + const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); + bool recursive = options.count("recursive"); + if (!disk.getDisk()->exists(path)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName()); + } + else if (disk.getDisk()->isDirectory(path)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path); + } + else + { + disk.getDisk()->removeRecursive(path); + } + } + else + { + disk.getDisk()->removeFileIfExists(path); } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - disk->removeRecursive(relative_path); } }; + +CommandPtr makeCommandRemove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRemove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp new file mode 100644 index 00000000000..fa02d991365 --- /dev/null +++ b/programs/disks/CommandSwitchDisk.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include "DisksApp.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandSwitchDisk final : public ICommand +{ +public: + explicit CommandSwitchDisk() : ICommand() + { + command_name = "switch-disk"; + description = "Switch disk (makes sense only in interactive mode)"; + options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( + "path", po::value(), "the path to switch on the disk"); + positional_options_description.add("disk", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + String disk = getValueFromCommandLineOptions(options, "disk"); + std::optional path = getValueFromCommandLineOptionsWithOptional(options, "path"); + + client.switchToDisk(disk, path); + } +}; + +CommandPtr makeCommandSwitchDisk() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandTouch.cpp b/programs/disks/CommandTouch.cpp new file mode 100644 index 00000000000..c0bdb64cf9e --- /dev/null +++ b/programs/disks/CommandTouch.cpp @@ -0,0 +1,34 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandTouch final : public ICommand +{ +public: + explicit CommandTouch() : ICommand() + { + command_name = "touch"; + description = "Create a file by path"; + options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + + disk.getDisk()->createFile(disk.getRelativeFromRoot(path)); + } +}; + +CommandPtr makeCommandTouch() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 7ded37e067a..9c82132e284 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -1,79 +1,57 @@ -#include "ICommand.h" #include +#include "ICommand.h" -#include #include #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandWrite final : public ICommand { public: CommandWrite() { command_name = "write"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Write a file from `FROM_PATH` to `TO_PATH`"; - usage = "write [OPTION]... [] "; - command_option_description->add_options() - ("input", po::value(), "file from which we are reading, defaults to `stdin`"); + description = "Write a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin` (input from `stdin` is finished by Ctrl+D)")( + "path-to", po::value(), "file to which we are writing (mandatory, positional)"); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("input")) - config.setString("input", options["input"].as()); - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + std::optional path_from = getValueFromCommandLineOptionsWithOptional(options, "path-from"); + + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + auto in = [&]() -> std::unique_ptr { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + if (!path_from.has_value()) + { + return std::make_unique(STDIN_FILENO); + } + else + { + String relative_path_from = disk.getRelativeFromRoot(path_from.value()); + return disk.getDisk()->readFile(relative_path_from); + } + }(); - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - String path_input = config.getString("input", ""); - std::unique_ptr in; - if (path_input.empty()) - { - in = std::make_unique(STDIN_FILENO); - } - else - { - String relative_path_input = validatePathAndGetAsRelative(path_input); - in = disk->readFile(relative_path_input); - } - - auto out = disk->writeFile(relative_path); + auto out = disk.getDisk()->writeFile(path_to); copyData(*in, *out); out->finalize(); } }; + +CommandPtr makeCommandWrite() +{ + return std::make_shared(); } -std::unique_ptr makeCommandWrite() -{ - return std::make_unique(); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 5da5ab4bae9..59ba45b9451 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -1,11 +1,22 @@ #include "DisksApp.h" +#include +#include +#include "Common/Exception.h" +#include "Common/filesystemHelpers.h" +#include +#include "DisksClient.h" #include "ICommand.h" +#include "ICommand_fwd.h" + +#include +#include +#include +#include #include -#include #include - +#include namespace DB { @@ -13,74 +24,289 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +LineReader::Patterns DisksApp::query_extenders = {"\\"}; +LineReader::Patterns DisksApp::query_delimiters = {""}; +String DisksApp::word_break_characters = " \t\v\f\a\b\r\n"; + +CommandPtr DisksApp::getCommandByName(const String & command) const +{ + try + { + if (auto it = aliases.find(command); it != aliases.end()) + return command_descriptions.at(it->second); + + return command_descriptions.at(command); + } + catch (std::out_of_range &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command); + } } -size_t DisksApp::findCommandPos(std::vector & common_arguments) +std::vector DisksApp::getEmptyCompletion(String command_name) const { - for (size_t i = 0; i < common_arguments.size(); i++) - if (supported_commands.contains(common_arguments[i])) - return i + 1; - return common_arguments.size(); + auto command_ptr = command_descriptions.at(command_name); + std::vector answer{}; + if (multidisk_commands.contains(command_ptr->command_name)) + { + answer = client->getAllFilesByPatternFromAllDisks(""); + } + else + { + answer = client->getCurrentDiskWithPath().getAllFilesByPattern(""); + } + for (const auto & disk_name : client->getAllDiskNames()) + { + answer.push_back(disk_name); + } + for (const auto & option : command_ptr->options_description.options()) + { + answer.push_back("--" + option->long_name()); + } + if (command_name == "help") + { + for (const auto & [current_command_name, description] : command_descriptions) + { + answer.push_back(current_command_name); + } + } + std::sort(answer.begin(), answer.end()); + return answer; } -void DisksApp::printHelpMessage(ProgramOptionsDescription & command_option_description) +std::vector DisksApp::getCommandsToComplete(const String & command_prefix) const { - std::optional help_description = - createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); - - help_description->add(command_option_description); - - std::cout << "ClickHouse disk management tool\n"; - std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; - std::cout << "clickhouse-disks\n\n"; - - for (const auto & current_command : supported_commands) - std::cout << command_descriptions[current_command]->command_name - << "\t" - << command_descriptions[current_command]->description - << "\n\n"; - - std::cout << command_option_description << '\n'; + std::vector answer{}; + for (const auto & [word, _] : command_descriptions) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + for (const auto & [word, _] : aliases) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + return {command_prefix}; } -String DisksApp::getDefaultConfigFileName() +std::vector DisksApp::getCompletions(const String & prefix) const { - return "/etc/clickhouse-server/config.xml"; + auto arguments = po::split_unix(prefix, word_break_characters); + if (arguments.empty()) + { + return {}; + } + if (word_break_characters.contains(prefix.back())) + { + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {arguments.back()}; + } + return getEmptyCompletion(command->command_name); + } + else if (arguments.size() == 1) + { + String command_prefix = arguments[0]; + return getCommandsToComplete(command_prefix); + } + else + { + String last_token = arguments.back(); + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {last_token}; + } + std::vector answer = {}; + if (command->command_name == "help") + { + return getCommandsToComplete(last_token); + } + else + { + answer = [&]() -> std::vector + { + if (multidisk_commands.contains(command->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(last_token); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); + } + }(); + + for (const auto & disk_name : client->getAllDiskNames()) + { + if (disk_name.starts_with(last_token)) + { + answer.push_back(disk_name); + } + } + for (const auto & option : command->options_description.options()) + { + String option_sign = "--" + option->long_name(); + if (option_sign.starts_with(last_token)) + { + answer.push_back(option_sign); + } + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + else + { + return {last_token}; + } + } } -void DisksApp::addOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description -) +bool DisksApp::processQueryText(const String & text) { - options_description_.add_options() - ("help,h", "Print common help message") - ("config-file,C", po::value(), "Set config file") - ("disk", po::value(), "Set disk name") - ("command_name", po::value(), "Name for command to do") - ("save-logs", "Save logs to a file") - ("log-level", po::value(), "Logging level") - ; + if (text.find_first_not_of(word_break_characters) == std::string::npos) + { + return true; + } + if (exit_strings.find(text) != exit_strings.end()) + return false; + CommandPtr command; + try + { + auto arguments = po::split_unix(text, word_break_characters); + command = getCommandByName(arguments[0]); + arguments.erase(arguments.begin()); + command->execute(arguments, *client); + } + catch (DB::Exception & err) + { + int code = getCurrentExceptionCode(); + if (code == ErrorCodes::LOGICAL_ERROR) + { + throw std::move(err); + } + else if (code == ErrorCodes::BAD_ARGUMENTS) + { + std::cerr << err.message() << "\n" + << "\n"; + if (command.get()) + { + std::cerr << "COMMAND: " << command->command_name << "\n"; + std::cerr << command->options_description << "\n"; + } + else + { + printAvailableCommandsHelpMessage(); + } + } + else + { + std::cerr << err.message() << "\n"; + } + } + catch (std::exception & err) + { + std::cerr << err.what() << "\n"; + } - positional_options_description.add("command_name", 1); + return true; +} - supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"}; -#ifdef CLICKHOUSE_CLOUD - supported_commands.insert("packed-io"); -#endif +void DisksApp::runInteractiveReplxx() +{ + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters.c_str(), + /* highlighter_= */ {}); + lr.enableBracketedPaste(); + + while (true) + { + DiskWithPath disk_with_path = client->getCurrentDiskWithPath(); + String prompt = "\x1b[1;34m" + disk_with_path.getDisk()->getName() + "\x1b[0m:" + "\x1b[1;31m" + disk_with_path.getCurrentPath() + + "\x1b[0m$ "; + + auto input = lr.readLine(prompt, "\x1b[1;31m:-] \x1b[0m"); + if (input.empty()) + break; + + if (!processQueryText(input)) + break; + } +} + +void DisksApp::parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options) +{ + auto parser = po::command_line_parser(arguments).options(options_description).allow_unregistered(); + po::parsed_options parsed = parser.run(); + po::store(parsed, options); +} + +void DisksApp::addOptions() +{ + options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( + "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( + "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode")( + "test-mode", "Interactive interface in test regyme"); command_descriptions.emplace("list-disks", makeCommandListDisks()); + command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("list", makeCommandList()); + command_descriptions.emplace("cd", makeCommandChangeDirectory()); command_descriptions.emplace("move", makeCommandMove()); command_descriptions.emplace("remove", makeCommandRemove()); command_descriptions.emplace("link", makeCommandLink()); - command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("write", makeCommandWrite()); command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); + command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); + command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath()); + command_descriptions.emplace("touch", makeCommandTouch()); + command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif + for (const auto & [command_name, command_ptr] : command_descriptions) + { + if (command_name != command_ptr->command_name) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Command name inside map doesn't coincide with actual command name"); + } + } } void DisksApp::processOptions() @@ -93,76 +319,122 @@ void DisksApp::processOptions() config().setBool("save-logs", true); if (options.count("log-level")) config().setString("log-level", options["log-level"].as()); + if (options.count("test-mode")) + config().setBool("test-mode", true); + if (options.count("query")) + query = std::optional{options["query"].as()}; } -DisksApp::~DisksApp() + +void DisksApp::printEntryHelpMessage() const { - if (global_context) - global_context->shutdown(); + std::cout << "\x1b[1;33m ClickHouse disk management tool \x1b[0m \n"; + std::cout << options_description << '\n'; } -void DisksApp::init(std::vector & common_arguments) + +void DisksApp::printAvailableCommandsHelpMessage() const { - stopOptionsProcessing(); + std::cout << "\x1b[1;32mAvailable commands:\x1b[0m\n"; + std::vector> commands_with_aliases_and_descrtiptions{}; + size_t maximal_command_length = 0; + for (const auto & [command_name, command_ptr] : command_descriptions) + { + std::string command_string = getCommandLineWithAliases(command_ptr); + maximal_command_length = std::max(maximal_command_length, command_string.size()); + commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions.at(command_name)}); + } + for (const auto & [command_with_aliases, command_ptr] : commands_with_aliases_and_descrtiptions) + { + std::cout << "\x1b[1;33m" << command_with_aliases << "\x1b[0m" << std::string(5, ' ') << "\x1b[1;33m" << command_ptr->description + << "\x1b[0m \n"; + std::cout << command_ptr->options_description; + std::cout << std::endl; + } +} - ProgramOptionsDescription options_description{createOptionsDescription("clickhouse-disks", getTerminalWidth())}; +void DisksApp::printCommandHelpMessage(CommandPtr command) const +{ + String command_name_with_aliases = getCommandLineWithAliases(command); + std::cout << "\x1b[1;32m" << command_name_with_aliases << "\x1b[0m" << std::string(2, ' ') << command->description << "\n"; + std::cout << command->options_description; +} - po::positional_options_description positional_options_description; +void DisksApp::printCommandHelpMessage(String command_name) const +{ + printCommandHelpMessage(getCommandByName(command_name)); +} - addOptions(options_description, positional_options_description); +String DisksApp::getCommandLineWithAliases(CommandPtr command) const +{ + String command_string = command->command_name; + bool need_comma = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == command->command_name) + { + if (std::exchange(need_comma, true)) + command_string += ","; + else + command_string += "("; + command_string += alias_name; + } + } + command_string += (need_comma ? ")" : ""); + return command_string; +} - size_t command_pos = findCommandPos(common_arguments); - std::vector global_flags(command_pos); - command_arguments.resize(common_arguments.size() - command_pos); - copy(common_arguments.begin(), common_arguments.begin() + command_pos, global_flags.begin()); - copy(common_arguments.begin() + command_pos, common_arguments.end(), command_arguments.begin()); +void DisksApp::initializeHistoryFile() +{ + String home_path; + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + if (config().has("history-file")) + history_file = config().getString("history-file"); + else + history_file = home_path + "/.disks-file-history"; - parseAndCheckOptions(options_description, positional_options_description, global_flags); + if (!history_file.empty() && !fs::exists(history_file)) + { + try + { + FS::createFile(history_file); + } + catch (const ErrnoException & e) + { + if (e.getErrno() != EEXIST) + throw; + } + } +} + +void DisksApp::init(const std::vector & common_arguments) +{ + addOptions(); + parseAndCheckOptions(common_arguments, options_description, options); po::notify(options); if (options.count("help")) { - printHelpMessage(options_description); + printEntryHelpMessage(); + printAvailableCommandsHelpMessage(); exit(0); // NOLINT(concurrency-mt-unsafe) } - if (!supported_commands.contains(command_name)) - { - std::cerr << "Unknown command name: " << command_name << "\n"; - printHelpMessage(options_description); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - processOptions(); } -void DisksApp::parseAndCheckOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments) +String DisksApp::getDefaultConfigFileName() { - auto parser = po::command_line_parser(arguments) - .options(options_description_) - .positional(positional_options_description) - .allow_unregistered(); - - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - - auto positional_arguments = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - for (const auto & arg : positional_arguments) - { - if (command_descriptions.contains(arg)) - { - command_name = arg; - break; - } - } + return "/etc/clickhouse-server/config.xml"; } int DisksApp::main(const std::vector & /*args*/) { + std::vector keys; + config().keys(keys); if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -173,9 +445,13 @@ int DisksApp::main(const std::vector & /*args*/) } else { + printEntryHelpMessage(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } + config().keys(keys); + initializeHistoryFile(); + if (config().has("save-logs")) { auto log_level = config().getString("log-level", "trace"); @@ -200,61 +476,68 @@ int DisksApp::main(const std::vector & /*args*/) global_context->setApplicationType(Context::ApplicationType::DISKS); String path = config().getString("path", DBMS_DEFAULT_PATH); + global_context->setPath(path); - auto & command = command_descriptions[command_name]; + String main_disk = config().getString("disk", "default"); - auto command_options = command->getCommandOptions(); - std::vector args; - if (command_options) + auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; + + constexpr auto config_prefix = "storage_configuration.disks"; + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); + disk_selector->initialize(config(), config_prefix, global_context, validator); + + std::vector>> disks_with_path; + + for (const auto & [_, disk_ptr] : disk_selector->getDisksMap()) { - auto parser = po::command_line_parser(command_arguments).options(*command_options).allow_unregistered(); - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - po::notify(options); + disks_with_path.emplace_back( + disk_ptr, (disk_ptr->getName() == "local") ? std::optional{fs::current_path().string()} : std::nullopt); + } - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - command->processOptions(config(), options); + + client = std::make_unique(std::move(disks_with_path), main_disk); + + suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); + + if (!query.has_value()) + { + runInteractive(); } else { - auto parser = po::command_line_parser(command_arguments).options({}).allow_unregistered(); - po::parsed_options parsed = parser.run(); - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + processQueryText(query.value()); } - std::unordered_set disks - { - config().getString("disk", "default"), - config().getString("disk-from", config().getString("disk", "default")), - config().getString("disk-to", config().getString("disk", "default")), - }; - - auto validator = [&disks]( - const Poco::Util::AbstractConfiguration & config, - const std::string & disk_config_prefix, - const std::string & disk_name) - { - if (!disks.contains(disk_name)) - return false; - - const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); - - if (disk_type == "cache") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); - - return true; - }; - - constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(); - disk_selector->initialize(config(), config_prefix, global_context, validator); - - command->execute(args, disk_selector, config()); - return Application::EXIT_OK; } +DisksApp::~DisksApp() +{ + client.reset(nullptr); + if (global_context) + global_context->shutdown(); +} + +void DisksApp::runInteractiveTestMode() +{ + for (String input; std::getline(std::cin, input);) + { + if (!processQueryText(input)) + break; + + std::cout << "\a\a\a\a" << std::endl; + std::cerr << std::flush; + } +} + +void DisksApp::runInteractive() +{ + if (config().hasOption("test-mode")) + runInteractiveTestMode(); + else + runInteractiveReplxx(); +} } int mainEntryClickHouseDisks(int argc, char ** argv) @@ -269,16 +552,16 @@ int mainEntryClickHouseDisks(int argc, char ** argv) catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; - return 1; + return 0; } catch (const boost::program_options::error & e) { std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + return 0; } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 1; + return 0; } } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 51bc3f58dc4..f8167884c62 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -1,61 +1,107 @@ #pragma once +#include +#include +#include #include +#include "DisksClient.h" +#include "ICommand_fwd.h" #include +#include +#include #include -#include - namespace DB { -class ICommand; -using CommandPtr = std::unique_ptr; - -namespace po = boost::program_options; using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; -class DisksApp : public Poco::Util::Application, public Loggers +class DisksApp : public Poco::Util::Application { public: - DisksApp() = default; - ~DisksApp() override; + void addOptions(); - void init(std::vector & common_arguments); - - int main(const std::vector & args) override; - -protected: - static String getDefaultConfigFileName(); - - void addOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description); void processOptions(); - void printHelpMessage(ProgramOptionsDescription & command_option_description); + bool processQueryText(const String & text); - size_t findCommandPos(std::vector & common_arguments); + void init(const std::vector & common_arguments); + + int main(const std::vector & /*args*/) override; + + CommandPtr getCommandByName(const String & command) const; + + void initializeHistoryFile(); + + static void parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); + + void printEntryHelpMessage() const; + void printAvailableCommandsHelpMessage() const; + void printCommandHelpMessage(String command_name) const; + void printCommandHelpMessage(CommandPtr command) const; + String getCommandLineWithAliases(CommandPtr command) const; + + + std::vector getCompletions(const String & prefix) const; + + std::vector getEmptyCompletion(String command_name) const; + + ~DisksApp() override; private: - void parseAndCheckOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments); + void runInteractive(); + void runInteractiveReplxx(); + void runInteractiveTestMode(); + + String getDefaultConfigFileName(); + + std::vector getCommandsToComplete(const String & command_prefix) const; + + // Fields responsible for the REPL work + String history_file; + LineReader::Suggest suggest; + static LineReader::Patterns query_extenders; + static LineReader::Patterns query_delimiters; + static String word_break_characters; + + // General command line arguments parsing fields -protected: - ContextMutablePtr global_context; SharedContextHolder shared_context; - - String command_name; - std::vector command_arguments; - - std::unordered_set supported_commands; + ContextMutablePtr global_context; + ProgramOptionsDescription options_description; + CommandLineOptions options; std::unordered_map command_descriptions; - po::variables_map options; -}; + std::optional query; + const std::unordered_map aliases + = {{"cp", "copy"}, + {"mv", "move"}, + {"ls", "list"}, + {"list_disks", "list-disks"}, + {"ln", "link"}, + {"rm", "remove"}, + {"cat", "read"}, + {"r", "read"}, + {"w", "write"}, + {"create", "touch"}, + {"delete", "remove"}, + {"ls-disks", "list-disks"}, + {"ls_disks", "list-disks"}, + {"packed_io", "packed-io"}, + {"change-dir", "cd"}, + {"change_dir", "cd"}, + {"switch_disk", "switch-disk"}, + {"current", "current_disk_with_path"}, + {"current_disk", "current_disk_with_path"}, + {"current_path", "current_disk_with_path"}, + {"cur", "current_disk_with_path"}}; + + std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; + + std::unique_ptr client{}; +}; } diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp new file mode 100644 index 00000000000..7e36c7911ab --- /dev/null +++ b/programs/disks/DisksClient.cpp @@ -0,0 +1,263 @@ +#include "DisksClient.h" +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +namespace DB +{ +DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(disk_) +{ + if (path_.has_value()) + { + if (!fs::path{path_.value()}.is_absolute()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + } + path = path_.value(); + } + else + { + path = String{"/"}; + } + + String relative_path = normalizePathAndGetAsRelative(path); + if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/")))) + { + return; + } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Initializing path {} (normalized path: {}) at disk {} is not a directory", + path, + relative_path, + disk->getName()); +} + +std::vector DiskWithPath::listAllFilesByPath(const String & any_path) const +{ + if (isDirectory(any_path)) + { + std::vector file_names; + disk->listFiles(getRelativeFromRoot(any_path), file_names); + return file_names; + } + else + { + return {}; + } +} + +std::vector DiskWithPath::getAllFilesByPattern(const String & pattern) const +{ + auto [path_before, path_after] = [&]() -> std::pair + { + auto slash_pos = pattern.find_last_of('/'); + if (slash_pos >= pattern.size()) + { + return {"", pattern}; + } + else + { + return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; + } + }(); + + if (!isDirectory(path_before)) + { + return {}; + } + else + { + std::vector file_names = listAllFilesByPath(path_before); + + std::vector answer; + + for (const auto & file_name : file_names) + { + if (file_name.starts_with(path_after)) + { + String file_pattern = path_before + file_name; + if (isDirectory(file_pattern)) + { + file_pattern = file_pattern + "/"; + } + answer.push_back(file_pattern); + } + } + return answer; + } +}; + +void DiskWithPath::setPath(const String & any_path) +{ + if (isDirectory(any_path)) + { + path = getAbsolutePath(any_path); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); + } +} + +String DiskWithPath::validatePathAndGetAsRelative(const String & path) +{ + String lexically_normal_path = fs::path(path).lexically_normal(); + if (lexically_normal_path.find("..") != std::string::npos) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); + + /// If path is absolute we should keep it as relative inside disk, so disk will look like + /// an ordinary filesystem with root. + if (fs::path(lexically_normal_path).is_absolute()) + return lexically_normal_path.substr(1); + + return lexically_normal_path; +} + +String DiskWithPath::normalizePathAndGetAsRelative(const String & messyPath) +{ + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + String npath = canonical_path.make_preferred().string(); + return validatePathAndGetAsRelative(npath); +} + +String DiskWithPath::normalizePath(const String & path) +{ + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + return canonical_path.make_preferred().string(); +} + +DisksClient::DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) +{ + if (disks_with_paths.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); + } + if (!begin_disk.has_value()) + { + begin_disk = disks_with_paths[0].first->getName(); + } + bool has_begin_disk = false; + for (auto & [disk, path] : disks_with_paths) + { + addDisk(disk, path); + if (disk->getName() == begin_disk.value()) + { + has_begin_disk = true; + } + } + if (!has_begin_disk) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); + } + current_disk = std::move(begin_disk.value()); +} + +const DiskWithPath & DisksClient::getDiskWithPath(const String & disk) const +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +DiskWithPath & DisksClient::getDiskWithPath(const String & disk) +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +const DiskWithPath & DisksClient::getCurrentDiskWithPath() const +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +DiskWithPath & DisksClient::getCurrentDiskWithPath() +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +void DisksClient::switchToDisk(const String & disk_, const std::optional & path_) +{ + if (disks.contains(disk_)) + { + if (path_.has_value()) + { + disks.at(disk_).setPath(path_.value()); + } + current_disk = disk_; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); + } +} + +std::vector DisksClient::getAllDiskNames() const +{ + std::vector answer{}; + answer.reserve(disks.size()); + for (const auto & [disk_name, _] : disks) + { + answer.push_back(disk_name); + } + return answer; +} + +std::vector DisksClient::getAllFilesByPatternFromAllDisks(const String & pattern) const +{ + std::vector answer{}; + for (const auto & [_, disk] : disks) + { + for (auto & word : disk.getAllFilesByPattern(pattern)) + { + answer.push_back(word); + } + } + return answer; +} + +void DisksClient::addDisk(DiskPtr disk_, const std::optional & path_) +{ + String disk_name = disk_->getName(); + if (disks.contains(disk_->getName())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); + } + disks.emplace(disk_name, DiskWithPath{disk_, path_}); +} +} diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h new file mode 100644 index 00000000000..8a55d22af93 --- /dev/null +++ b/programs/disks/DisksClient.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Disks/IDisk.h" + +#include +#include +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +std::vector split(const String & text, const String & delimiters); + +using ProgramOptionsDescription = boost::program_options::options_description; +using CommandLineOptions = boost::program_options::variables_map; + +class DiskWithPath +{ +public: + explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt); + + String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } + + String getCurrentPath() const { return path; } + + bool isDirectory(const String & any_path) const + { + return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/"))); + } + + std::vector listAllFilesByPath(const String & any_path) const; + + std::vector getAllFilesByPattern(const String & pattern) const; + + DiskPtr getDisk() const { return disk; } + + void setPath(const String & any_path); + + String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); } + +private: + static String validatePathAndGetAsRelative(const String & path); + static std::string normalizePathAndGetAsRelative(const std::string & messyPath); + static std::string normalizePath(const std::string & messyPath); + + const DiskPtr disk; + String path; +}; + +class DisksClient +{ +public: + explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk); + + const DiskWithPath & getDiskWithPath(const String & disk) const; + + DiskWithPath & getDiskWithPath(const String & disk); + + const DiskWithPath & getCurrentDiskWithPath() const; + + DiskWithPath & getCurrentDiskWithPath(); + + DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); } + + DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } + + void switchToDisk(const String & disk_, const std::optional & path_); + + std::vector getAllDiskNames() const; + + std::vector getAllFilesByPatternFromAllDisks(const String & pattern) const; + + +private: + void addDisk(DiskPtr disk_, const std::optional & path_); + + String current_disk; + std::unordered_map disks; +}; +} diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 86188fb6db1..f622bcad3c6 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,5 +1,5 @@ #include "ICommand.h" -#include +#include "DisksClient.h" namespace DB @@ -10,43 +10,42 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void ICommand::printHelpMessage() const +CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands) { - std::cout << "Command: " << command_name << '\n'; - std::cout << "Description: " << description << '\n'; - std::cout << "Usage: " << usage << '\n'; + CommandLineOptions options; + auto parser = po::command_line_parser(commands); + parser.options(options_description).positional(positional_options_description); - if (command_option_description) + po::parsed_options parsed = parser.run(); + po::store(parsed, options); + + return options; +} + +void ICommand::execute(const Strings & commands, DisksClient & client) +{ + try { - auto options = *command_option_description; - if (!options.options().empty()) - std::cout << options << '\n'; + processCommandLineArguments(commands); + } + catch (std::exception & exc) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what()); + } + executeImpl(processCommandLineArguments(commands), client); +} + +DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name) +{ + auto disk_name = getValueFromCommandLineOptionsWithOptional(options, name); + if (disk_name.has_value()) + { + return client.getDiskWithPath(disk_name.value()); + } + else + { + return client.getCurrentDiskWithPath(); } } -void ICommand::addOptions(ProgramOptionsDescription & options_description) -{ - if (!command_option_description || command_option_description->options().empty()) - return; - - options_description.add(*command_option_description); -} - -String ICommand::validatePathAndGetAsRelative(const String & path) -{ - /// If path contain non-normalized symbols like . we will normalized them. If the resulting normalized path - /// still contain '..' it can be dangerous, disallow such paths. Also since clickhouse-disks - /// is not an interactive program (don't track you current path) it's OK to disallow .. paths. - String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); - - /// If path is absolute we should keep it as relative inside disk, so disk will look like - /// an ordinary filesystem with root. - if (fs::path(lexically_normal_path).is_absolute()) - return lexically_normal_path.substr(1); - - return lexically_normal_path; -} - } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index efe350fe87b..6faf90e2b52 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,66 +1,146 @@ #pragma once -#include +#include #include +#include +#include #include -#include #include +#include "Common/Exception.h" +#include -#include +#include + +#include "DisksApp.h" + +#include "DisksClient.h" + +#include "ICommand_fwd.h" namespace DB { namespace po = boost::program_options; -using ProgramOptionsDescription = boost::program_options::options_description; -using CommandLineOptions = boost::program_options::variables_map; +using ProgramOptionsDescription = po::options_description; +using PositionalProgramOptionsDescription = po::positional_options_description; +using CommandLineOptions = po::variables_map; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} class ICommand { public: - ICommand() = default; + explicit ICommand() = default; virtual ~ICommand() = default; - virtual void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) = 0; + void execute(const Strings & commands, DisksClient & client); - const std::optional & getCommandOptions() const { return command_option_description; } + virtual void executeImpl(const CommandLineOptions & options, DisksClient & client) = 0; - void addOptions(ProgramOptionsDescription & options_description); - - virtual void processOptions(Poco::Util::LayeredConfiguration & config, po::variables_map & options) const = 0; + CommandLineOptions processCommandLineArguments(const Strings & commands); protected: - void printHelpMessage() const; + template + static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name) + { + try + { + return options[name].as(); + } + catch (boost::bad_any_cast &) + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); + } + } + + template + static T getValueFromCommandLineOptionsThrow(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument '{}' is missing", name); + } + } + + template + static T getValueFromCommandLineOptionsWithDefault(const CommandLineOptions & options, const String & name, const T & default_value) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + return default_value; + } + } + + template + static std::optional getValueFromCommandLineOptionsWithOptional(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return std::optional{getValueFromCommandLineOptions(options, name)}; + } + else + { + return std::nullopt; + } + } + + DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name); + + String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to) + { + if (!disk_to.getDisk()->isDirectory(path_to)) + { + return path_to; + } + String copied_path_from = path_from; + if (copied_path_from.ends_with('/')) + { + copied_path_from.pop_back(); + } + String plain_filename = fs::path(copied_path_from).filename(); + + return fs::path{path_to} / plain_filename; + } - static String validatePathAndGetAsRelative(const String & path); public: String command_name; String description; + ProgramOptionsDescription options_description; protected: - std::optional command_option_description; - String usage; - po::positional_options_description positional_options_description; + PositionalProgramOptionsDescription positional_options_description; }; -using CommandPtr = std::unique_ptr; - -} - DB::CommandPtr makeCommandCopy(); -DB::CommandPtr makeCommandLink(); -DB::CommandPtr makeCommandList(); DB::CommandPtr makeCommandListDisks(); +DB::CommandPtr makeCommandList(); +DB::CommandPtr makeCommandChangeDirectory(); +DB::CommandPtr makeCommandLink(); DB::CommandPtr makeCommandMove(); DB::CommandPtr makeCommandRead(); DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); +DB::CommandPtr makeCommandSwitchDisk(); +DB::CommandPtr makeCommandGetCurrentDiskAndPath(); +DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); +DB::CommandPtr makeCommandTouch(); +#ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); +#endif +} diff --git a/programs/disks/ICommand_fwd.h b/programs/disks/ICommand_fwd.h new file mode 100644 index 00000000000..84310b4a18d --- /dev/null +++ b/programs/disks/ICommand_fwd.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ +class ICommand; + +using CommandPtr = std::shared_ptr; +} diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 967920557e1..44c2daa33ad 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -264,6 +264,35 @@ HTTPContextPtr httpContext() return std::make_shared(Context::getGlobalContextInstance()); } +String getKeeperPath(Poco::Util::LayeredConfiguration & config) +{ + String path; + if (config.has("keeper_server.storage_path")) + { + path = config.getString("keeper_server.storage_path"); + } + else if (config.has("keeper_server.log_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path(); + } + else if (config.has("keeper_server.snapshot_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } + else + { + path = KEEPER_DEFAULT_PATH; + } + return path; +} + + } int Keeper::main(const std::vector & /*args*/) @@ -316,31 +345,7 @@ try updateMemorySoftLimitInConfig(config()); - std::string path; - - if (config().has("keeper_server.storage_path")) - { - path = config().getString("keeper_server.storage_path"); - } - else if (config().has("keeper_server.log_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); - } - else if (config().has("keeper_server.snapshot_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); - } - else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", - KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); - } - else - { - path = KEEPER_DEFAULT_PATH; - } - + std::string path = getKeeperPath(config()); std::filesystem::create_directories(path); /// Check that the process user id matches the owner of the data. @@ -554,7 +559,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", KEEPER_DEFAULT_PATH), + getKeeperPath(config()), std::move(unused_cache), unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) diff --git a/programs/keeper/conf.d/local.yaml b/programs/keeper/conf.d/local.yaml new file mode 100644 index 00000000000..722e90e374a --- /dev/null +++ b/programs/keeper/conf.d/local.yaml @@ -0,0 +1,9 @@ +logger: + log: + "@remove": remove + errorlog: + "@remove": remove + console: 1 +keeper_server: + log_storage_path: ./logs + snapshot_storage_path: ./snapshots diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 066877b1cec..3b88bb36954 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -587,6 +587,54 @@ static void sanityChecks(Server & server) } } +void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) +{ + try + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys("startup_scripts", keys); + + SetResultDetailsFunc callback; + for (const auto & key : keys) + { + std::string full_prefix = "startup_scripts." + key; + + if (config.has(full_prefix + ".condition")) + { + auto condition = config.getString(full_prefix + ".condition"); + auto condition_read_buffer = ReadBufferFromString(condition); + auto condition_write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Checking startup query condition `{}`", condition); + executeQuery(condition_read_buffer, condition_write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + + auto result = condition_write_buffer.str(); + + if (result != "1\n" && result != "true\n") + { + if (result != "0\n" && result != "false\n") + context->addWarningMessage(fmt::format("The condition query returned `{}`, which can't be interpreted as a boolean (`0`, `false`, `1`, `true`). Will skip this query.", result)); + + continue; + } + + LOG_DEBUG(log, "Condition is true, will execute the query next"); + } + + auto query = config.getString(full_prefix + ".query"); + auto read_buffer = ReadBufferFromString(query); + auto write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Executing query `{}`", query); + executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + } + } + catch (...) + { + tryLogCurrentException(log, "Failed to parse startup scripts file"); + } +} + static void initializeAzureSDKLogger( [[ maybe_unused ]] const ServerSettings & server_settings, [[ maybe_unused ]] int server_logs_level) @@ -626,6 +674,28 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { @@ -716,7 +786,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE @@ -1958,6 +2038,11 @@ try /// otherwise there is a race condition between the system database initialization /// and creation of new tables in the database. waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks); + + /// Startup scripts can depend on the system log tables. + if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed) + global_context->setServerSetting("prepare_system_log_tables_on_startup", true); + /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -2106,6 +2191,9 @@ try load_metadata_tasks.clear(); load_metadata_tasks.shrink_to_fit(); + if (config().has("startup_scripts")) + loadStartupScripts(config(), global_context, log); + { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/pyproject.toml b/pyproject.toml index 279d077a695..90f089afa41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,10 +35,9 @@ disable = ''' broad-except, bare-except, no-else-return, - global-statement + global-statement, ''' [tool.pylint.SIMILARITIES] # due to SQL min-similarity-lines=1000 - diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index ac221bd66e7..698602ca5bc 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -10,6 +10,7 @@ #include #include +#include namespace DB { @@ -26,12 +27,103 @@ static constexpr std::array boolean_functions{ "like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv, "or"sv}; -static bool isBooleanFunction(const String & func_name) + +bool isBooleanFunction(const String & func_name) { return std::any_of( boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; }); } +bool isNodeFunction(const QueryTreeNodePtr & node, const String & func_name) +{ + if (const auto * function_node = node->as()) + return function_node->getFunctionName() == func_name; + return false; +} + +QueryTreeNodePtr getFunctionArgument(const QueryTreeNodePtr & node, size_t idx) +{ + if (const auto * function_node = node->as()) + { + const auto & args = function_node->getArguments().getNodes(); + if (idx < args.size()) + return args[idx]; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected '{}' to be a function with at least {} arguments", node->formatASTForErrorMessage(), idx + 1); +} + +QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) +{ + for (const auto & node : nodes) + { + const auto * function_node = node->as(); + if (function_node && function_node->getFunctionName() == "equals" && + function_node->getArguments().getNodes().size() == 2) + { + return node; + } + } + return nullptr; +} + +bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) +{ + const auto * constant_node = node->as(); + if (!constant_node || !constant_node->getResultType()->equals(DataTypeUInt8())) + return false; + + UInt64 constant_value; + return (constant_node->getValue().tryGet(constant_value) && constant_value == expected_value); +} + +/// Returns true if expression consists of only conjunctions of functions with the specified name or true constants +bool isOnlyConjunctionOfFunctions( + const QueryTreeNodePtr & node, + const String & func_name, + const QueryTreeNodePtrWithHashSet & allowed_arguments) +{ + if (isBooleanConstant(node, true)) + return true; + + const auto * node_function = node->as(); + if (!node_function) + return false; + + if (node_function->getFunctionName() == func_name + && allowed_arguments.contains(node_function->getArgumentsNode())) + return true; + + if (node_function->getFunctionName() == "and") + { + for (const auto & and_argument : node_function->getArguments().getNodes()) + { + if (!isOnlyConjunctionOfFunctions(and_argument, func_name, allowed_arguments)) + return false; + } + return true; + } + return false; +} + +/// We can rewrite to a <=> b only if we are joining on a and b, +/// because the function is not yet implemented for other cases. +bool isTwoArgumentsFromDifferentSides(const FunctionNode & node_function, const JoinNode & join_node) +{ + const auto & argument_nodes = node_function.getArguments().getNodes(); + if (argument_nodes.size() != 2) + return false; + + auto first_src = getExpressionSource(argument_nodes[0]); + auto second_src = getExpressionSource(argument_nodes[1]); + if (!first_src || !second_src) + return false; + + const auto & lhs_join = *join_node.getLeftTableExpression(); + const auto & rhs_join = *join_node.getRightTableExpression(); + return (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || + (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); +} + /// Visitor that optimizes logical expressions _only_ in JOIN ON section class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext { @@ -47,15 +139,16 @@ public: { auto * function_node = node->as(); - if (!function_node) - return; + QueryTreeNodePtr new_node = nullptr; + if (function_node && function_node->getFunctionName() == "or") + new_node = tryOptimizeJoinOnNulls(function_node->getArguments().getNodes(), getContext()); + else + new_node = tryOptimizeJoinOnNulls({node}, getContext()); - if (function_node->getFunctionName() == "or") + if (new_node) { - bool is_argument_type_changed = tryOptimizeIsNotDistinctOrIsNull(node, getContext()); - if (is_argument_type_changed) - need_rerun_resolve = true; - return; + need_rerun_resolve |= !new_node->getResultType()->equals(*node->getResultType()); + node = new_node; } } @@ -72,15 +165,11 @@ private: const JoinNode * join_node; bool need_rerun_resolve = false; - /// Returns true if type of some operand is changed and parent function needs to be re-resolved - bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) + /// Returns optimized node or nullptr if nothing have been changed + QueryTreeNodePtr tryOptimizeJoinOnNulls(const QueryTreeNodes & nodes, const ContextPtr & context) { - auto & function_node = node->as(); - chassert(function_node.getFunctionName() == "or"); - - QueryTreeNodes or_operands; - or_operands.reserve(function_node.getArguments().getNodes().size()); + or_operands.reserve(nodes.size()); /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above std::vector equals_functions_indices; @@ -93,47 +182,73 @@ private: * b => [(a IS NULL AND b IS NULL)] * c => [(a IS NULL AND c IS NULL)] * } - * Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL + * Then for each equality a = b we can check if we have operand (a IS NULL AND b IS NULL) */ QueryTreeNodePtrWithHashMap> is_null_argument_to_indices; - for (const auto & argument : function_node.getArguments()) - { - or_operands.push_back(argument); + bool is_anything_changed = false; - auto * argument_function = argument->as(); + for (const auto & node : nodes) + { + if (isBooleanConstant(node, false)) + { + /// Remove false constants from OR + is_anything_changed = true; + continue; + } + + or_operands.push_back(node); + auto * argument_function = node->as(); if (!argument_function) continue; const auto & func_name = argument_function->getFunctionName(); if (func_name == "equals" || func_name == "isNotDistinctFrom") { - const auto & argument_nodes = argument_function->getArguments().getNodes(); - if (argument_nodes.size() != 2) - continue; - /// We can rewrite to a <=> b only if we are joining on a and b, - /// because the function is not yet implemented for other cases. - auto first_src = getExpressionSource(argument_nodes[0]); - auto second_src = getExpressionSource(argument_nodes[1]); - if (!first_src || !second_src) - continue; - const auto & lhs_join = *join_node->getLeftTableExpression(); - const auto & rhs_join = *join_node->getRightTableExpression(); - bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || - (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); - if (!arguments_from_both_sides) - continue; - equals_functions_indices.push_back(or_operands.size() - 1); + if (isTwoArgumentsFromDifferentSides(*argument_function, *join_node)) + equals_functions_indices.push_back(or_operands.size() - 1); } else if (func_name == "and") { - for (const auto & and_argument : argument_function->getArguments().getNodes()) + const auto & and_arguments = argument_function->getArguments().getNodes(); + bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull"); + if (all_are_is_null) { - auto * and_argument_function = and_argument->as(); - if (and_argument_function && and_argument_function->getFunctionName() == "isNull") + is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); + is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); + } + + /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` + /// Even though this expression are not equivalent (first is NULL on NULLs, while second is FALSE), + /// it is still correct since for JOIN ON condition NULL is treated as FALSE + if (const auto & equals_function = findEqualsFunction(and_arguments)) + { + const auto & equals_arguments = equals_function->as()->getArguments().getNodes(); + /// Expected isNotNull arguments + QueryTreeNodePtrWithHashSet allowed_arguments; + allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared(QueryTreeNodes{equals_arguments[0]}))); + allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared(QueryTreeNodes{equals_arguments[1]}))); + + bool can_be_optimized = true; + for (const auto & and_argument : and_arguments) { - const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0]; - is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1); + if (and_argument.get() == equals_function.get()) + continue; + + if (isOnlyConjunctionOfFunctions(and_argument, "isNotNull", allowed_arguments)) + continue; + + can_be_optimized = false; + break; + } + + if (can_be_optimized) + { + is_anything_changed = true; + or_operands.pop_back(); + or_operands.push_back(equals_function); + if (isTwoArgumentsFromDifferentSides(equals_function->as(), *join_node)) + equals_functions_indices.push_back(or_operands.size() - 1); } } } @@ -144,9 +259,9 @@ private: for (size_t equals_function_idx : equals_functions_indices) { - auto * equals_function = or_operands[equals_function_idx]->as(); + const auto * equals_function = or_operands[equals_function_idx]->as(); - /// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND + /// For a = b we are looking for all expressions `a IS NULL AND b IS NULL` const auto & argument_nodes = equals_function->getArguments().getNodes(); const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]]; const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]]; @@ -161,60 +276,40 @@ private: for (size_t to_optimize_idx : operands_to_optimize) { - /// We are looking for operand `a IS NULL AND b IS NULL AND ...` - auto * operand_to_optimize = or_operands[to_optimize_idx]->as(); - - /// Remove `a IS NULL` and `b IS NULL` arguments from AND - QueryTreeNodes new_arguments; - for (const auto & and_argument : operand_to_optimize->getArguments().getNodes()) - { - bool to_eliminate = false; - - const auto * and_argument_function = and_argument->as(); - if (and_argument_function && and_argument_function->getFunctionName() == "isNull") - { - const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0]; - to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1])); - } - - if (to_eliminate) - arguments_to_reresolve.insert(to_optimize_idx); - else - new_arguments.emplace_back(and_argument); - } - /// If less than two arguments left, we will remove or replace the whole AND below - operand_to_optimize->getArguments().getNodes() = std::move(new_arguments); + /// Remove `a IS NULL AND b IS NULL` + or_operands[to_optimize_idx] = nullptr; + is_anything_changed = true; } } - if (arguments_to_reresolve.empty()) + if (arguments_to_reresolve.empty() && !is_anything_changed) /// Nothing have been changed - return false; + return nullptr; auto and_function_resolver = FunctionFactory::instance().get("and", context); auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", context); - bool need_reresolve = false; QueryTreeNodes new_or_operands; for (size_t i = 0; i < or_operands.size(); ++i) { if (arguments_to_reresolve.contains(i)) { - auto * function = or_operands[i]->as(); + const auto * function = or_operands[i]->as(); if (function->getFunctionName() == "equals") { /// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL - need_reresolve |= function->getResultType()->isNullable(); - function->resolveAsFunction(strict_equals_function_resolver); - new_or_operands.emplace_back(std::move(or_operands[i])); + auto new_function = or_operands[i]->clone(); + new_function->as()->resolveAsFunction(strict_equals_function_resolver); + new_or_operands.emplace_back(std::move(new_function)); } else if (function->getFunctionName() == "and") { const auto & and_arguments = function->getArguments().getNodes(); if (and_arguments.size() > 1) { - function->resolveAsFunction(and_function_resolver); - new_or_operands.emplace_back(std::move(or_operands[i])); + auto new_function = or_operands[i]->clone(); + new_function->as()->resolveAsFunction(and_function_resolver); + new_or_operands.emplace_back(std::move(new_function)); } else if (and_arguments.size() == 1) { @@ -223,25 +318,26 @@ private: } } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function '{}'", function->getFunctionName()); } - else + else if (or_operands[i]) { new_or_operands.emplace_back(std::move(or_operands[i])); } } + if (new_or_operands.empty()) + return nullptr; + if (new_or_operands.size() == 1) - { - node = std::move(new_or_operands[0]); - return need_reresolve; - } + return new_or_operands[0]; /// Rebuild OR function auto or_function_resolver = FunctionFactory::instance().get("or", context); - function_node.getArguments().getNodes() = std::move(new_or_operands); - function_node.resolveAsFunction(or_function_resolver); - return need_reresolve; + auto function_node = std::make_shared("or"); + function_node->getArguments().getNodes() = std::move(new_or_operands); + function_node->resolveAsFunction(or_function_resolver); + return function_node; } }; diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index f52d724f346..e5ee8a0d0b2 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -33,7 +33,7 @@ public: return; auto * function_node = node->as(); - if (!function_node || !function_node->isAggregateFunction()) + if (!function_node || !function_node->isAggregateFunction() || !function_node->getResultType()->equals(DataTypeUInt64())) return; auto function_name = function_node->getFunctionName(); diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 6a5db4bc1de..dd083dd5df6 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -940,6 +940,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select table_join.locality, result_join_strictness, result_join_kind); + join_node->setOriginalAST(table_element.table_join); /** Original AST is not set because it will contain only join part and does * not include left table expression. diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h index 21b5284458c..71589ec3b30 100644 --- a/src/Backups/BackupOperationInfo.h +++ b/src/Backups/BackupOperationInfo.h @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 303105751ad..05839b44452 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -8,6 +8,7 @@ namespace ProfileEvents extern const Event DistributedConnectionUsable; extern const Event DistributedConnectionMissingTable; extern const Event DistributedConnectionStaleReplica; + extern const Event DistributedConnectionFailTry; } namespace DB @@ -97,6 +98,8 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std:: } catch (const Exception & e) { + ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); + if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF && e.code() != ErrorCodes::DNS_ERROR) throw; diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 51cbe6f3d6f..4effc3adafa 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -187,9 +187,9 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); - if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) + if (offset_states.size() > 1 && enable_offset_parallel_processing) { modified_settings.parallel_replicas_count = offset_states.size(); modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; @@ -201,7 +201,8 @@ void HedgedConnections::sendQuery( /// all servers involved in the distributed query processing. modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); - replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); + replica.connection->sendQuery( + timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); }; diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 0fa2bc12924..be7397b0fad 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -7,7 +7,6 @@ namespace ProfileEvents { extern const Event HedgedRequestsChangeReplica; - extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailAtAll; } @@ -327,7 +326,6 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect { ShuffledPool & shuffled_pool = shuffled_pools[index]; LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1); shuffled_pool.slowdown_count = 0; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 99bdd706d8b..bcef286ecbc 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -23,8 +24,8 @@ namespace ErrorCodes } -MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) +MultiplexedConnections::MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { connection.setThrottler(throttler); @@ -36,9 +37,9 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se } -MultiplexedConnections::MultiplexedConnections(std::shared_ptr connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) - , connection_ptr(connection_ptr_) +MultiplexedConnections::MultiplexedConnections( + std::shared_ptr connection_ptr_, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()), connection_ptr(connection_ptr_) { connection_ptr->setThrottler(throttler); @@ -50,9 +51,8 @@ MultiplexedConnections::MultiplexedConnections(std::shared_ptr conne } MultiplexedConnections::MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) + std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { /// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that /// `skip_unavailable_shards` was set. Then just return. @@ -156,18 +156,18 @@ void MultiplexedConnections::sendQuery( /// all servers involved in the distributed query processing. modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); size_t num_replicas = replica_states.size(); if (num_replicas > 1) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) /// Use multiple replicas for parallel query processing. modified_settings.parallel_replicas_count = num_replicas; for (size_t i = 0; i < num_replicas; ++i) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) modified_settings.parallel_replica_offset = i; replica_states[i].connection->sendQuery( diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index 9f7b47e0562..dec32e52d4f 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -10,7 +10,6 @@ namespace DB { - /** To retrieve data directly from multiple replicas (connections) from one shard * within a single thread. As a degenerate case, it can also work with one connection. * It is assumed that all functions except sendCancel are always executed in one thread. @@ -21,14 +20,12 @@ class MultiplexedConnections final : public IConnections { public: /// Accepts ready connection. - MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts ready connection and keep it alive before drain - MultiplexedConnections(std::shared_ptr connection_, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::shared_ptr connection_, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts a vector of connections to replicas of one shard already taken from pool. - MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler_); void sendScalarsData(Scalars & data) override; void sendExternalTablesData(std::vector & data) override; @@ -86,6 +83,7 @@ private: /// Mark the replica as invalid. void invalidateReplica(ReplicaState & replica_state); + ContextPtr context; const Settings & settings; /// The current number of valid connections to the replicas of this shard. diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f7e6b1a1ccc..cfd07c27765 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,11 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) +#else +void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) +#endif { const ColumnAggregateFunction & from_concrete = assert_cast(from); @@ -462,7 +466,11 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) +#else +void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) +#endif { insertRangeFrom(from, n, 1); } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index a75b27e835c..1be7a862438 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,14 @@ public: void insertData(const char * pos, size_t length) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & from, size_t n) override; +#else + using IColumn::insertFrom; + + void doInsertFrom(const IColumn & from, size_t n) override; +#endif + void insertFrom(ConstAggregateDataPtr place); @@ -182,7 +189,11 @@ public: void protect() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; +#endif void popBack(size_t n) override; @@ -201,7 +212,11 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { return 0; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 0b7e6541560..5d7350f3a79 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -337,7 +337,11 @@ bool ColumnArray::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnArray::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnArray & src = assert_cast(src_); size_t size = src.sizeAt(n); @@ -392,7 +396,11 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#else +int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, nan_direction_hint); } @@ -535,7 +543,11 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 53eb5166df8..6cd3e2f6c3b 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,10 +84,18 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; +#endif void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -95,7 +103,11 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 934adf07cf4..5e455709fec 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,7 +85,11 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#else + void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#endif void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } @@ -100,7 +104,11 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { throwMustBeDecompressed(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index c2c0fa3027c..b55a1f42037 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -32,6 +32,8 @@ private: ColumnConst(const ColumnConst & src) = default; public: + bool isConst() const override { return true; } + ColumnPtr convertToFullColumn() const; ColumnPtr convertToFullColumnIfConst() const override @@ -121,7 +123,11 @@ public: return data->isNullAt(0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#else + void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#endif { s += length; } @@ -145,12 +151,20 @@ public: ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn &, size_t) override +#else + void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#else + void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#endif void insertDefault() override { @@ -223,7 +237,11 @@ public: return data->allocatedBytes() + sizeof(s); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#else + int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#endif { return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index eb9784c14dd..cf413f790a7 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -32,7 +32,11 @@ namespace ErrorCodes } template +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#else +int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#endif { auto & other = static_cast(rhs_); const T & a = data[n]; @@ -331,7 +335,11 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnDecimal & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index c4510ba2922..32efeb643a6 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,9 +55,17 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#else + void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -68,7 +76,11 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void popBack(size_t n) override { @@ -92,7 +104,11 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 3c147b6f123..c735238f515 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -213,7 +215,11 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +#else +void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) +#endif { const auto & dynamic_src = assert_cast(src_); @@ -263,7 +269,11 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#else +void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#endif { if (start + length > src_.size()) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " @@ -429,7 +439,11 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else +void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { const auto & dynamic_src = assert_cast(src_); @@ -481,7 +495,7 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co /// We cannot use Variant serialization here as it serializes discriminator + value, /// but Dynamic doesn't have fixed mapping discriminator <-> variant type /// as different Dynamic column can have different Variants. - /// Instead, we serialize null bit + variant type name (size + bytes) + value. + /// Instead, we serialize null bit + variant type in binary format (size + bytes) + value. const auto & variant_col = assert_cast(*variant_column); auto discr = variant_col.globalDiscriminatorAt(n); StringRef res; @@ -495,14 +509,15 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co return res; } - const auto & variant_name = variant_info.variant_names[discr]; - size_t variant_name_size = variant_name.size(); - char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_name.size(), begin); + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(discr); + String variant_type_binary_data = encodeDataType(variant_type); + size_t variant_type_binary_data_size = variant_type_binary_data.size(); + char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(), begin); memcpy(pos, &null_bit, sizeof(UInt8)); - memcpy(pos + sizeof(UInt8), &variant_name_size, sizeof(size_t)); - memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_name.data(), variant_name.size()); + memcpy(pos + sizeof(UInt8), &variant_type_binary_data_size, sizeof(size_t)); + memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_type_binary_data.data(), variant_type_binary_data.size()); res.data = pos; - res.size = sizeof(UInt8) + sizeof(size_t) + variant_name.size(); + res.size = sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(); auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin); res.data = value_ref.data - res.size; @@ -521,13 +536,15 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) return pos; } - /// Read variant type name. - const size_t variant_name_size = unalignedLoad(pos); - pos += sizeof(variant_name_size); - String variant_name; - variant_name.resize(variant_name_size); - memcpy(variant_name.data(), pos, variant_name_size); - pos += variant_name_size; + /// Read variant type in binary format. + const size_t variant_type_binary_data_size = unalignedLoad(pos); + pos += sizeof(variant_type_binary_data_size); + String variant_type_binary_data; + variant_type_binary_data.resize(variant_type_binary_data_size); + memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); + pos += variant_type_binary_data_size; + auto variant_type = decodeDataType(variant_type_binary_data); + auto variant_name = variant_type->getName(); /// If we already have such variant, just deserialize it into corresponding variant column. auto it = variant_info.variant_name_to_discriminator.find(variant_name); if (it != variant_info.variant_name_to_discriminator.end()) @@ -537,7 +554,6 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) } /// If we don't have such variant, add it. - auto variant_type = DataTypeFactory::instance().get(variant_name); if (likely(addNewVariant(variant_type))) { auto discr = variant_info.variant_name_to_discriminator[variant_name]; @@ -563,13 +579,13 @@ const char * ColumnDynamic::skipSerializedInArena(const char * pos) const if (null_bit) return pos; - const size_t variant_name_size = unalignedLoad(pos); - pos += sizeof(variant_name_size); - String variant_name; - variant_name.resize(variant_name_size); - memcpy(variant_name.data(), pos, variant_name_size); - pos += variant_name_size; - auto tmp_variant_column = DataTypeFactory::instance().get(variant_name)->createColumn(); + const size_t variant_type_binary_data_size = unalignedLoad(pos); + pos += sizeof(variant_type_binary_data_size); + String variant_type_binary_data; + variant_type_binary_data.resize(variant_type_binary_data_size); + memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); + pos += variant_type_binary_data_size; + auto tmp_variant_column = decodeDataType(variant_type_binary_data)->createColumn(); return tmp_variant_column->skipSerializedInArena(pos); } @@ -587,7 +603,11 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#else +int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#endif { const auto & left_variant = assert_cast(*variant_column); const auto & right_dynamic = assert_cast(rhs); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 27ad0dd583f..9abddc7a26d 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -142,9 +142,16 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertDefault() override { @@ -213,7 +220,11 @@ public: return scattered_columns; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index d7e4eff2727..1c2de203a94 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,11 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) +#else +void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) +#endif { const ColumnFixedString & src = assert_cast(src_); @@ -86,7 +90,11 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); if (n != src_concrete.getN()) @@ -219,7 +227,11 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); chassert(this->n == src_concrete.n); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 7b46dc11cd6..6e88136fc50 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,9 +98,17 @@ public: bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t index) override; +#else + void doInsertFrom(const IColumn & src_, size_t index) override; +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override; @@ -129,7 +137,11 @@ public: void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else + int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnFixedString & rhs = assert_cast(rhs_); chassert(this->n == rhs.n); @@ -144,7 +156,11 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 0ab9d15ad50..fa57f35a823 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,11 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFunction::insertFrom(const IColumn & src, size_t n) +#else +void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnFunction & src_func = assert_cast(src); @@ -89,7 +93,11 @@ void ColumnFunction::insertFrom(const IColumn & src, size_t n) ++elements_size; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFunction & src_func = assert_cast(src); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 6fdc6679d3e..ba924c49a82 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -94,8 +94,16 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; +#endif void insertData(const char *, size_t) override { @@ -137,7 +145,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName()); } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 208326fe629..eb694a10b0f 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -159,7 +159,11 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) +#else +void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) +#endif { const auto * low_cardinality_src = typeid_cast(&src); @@ -187,7 +191,11 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto * low_cardinality_src = typeid_cast(&src); @@ -364,7 +372,11 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index ac3b725b22f..e99be07cd8d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,10 +78,18 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertFromFullColumn(const IColumn & src, size_t n); +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length); void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions); @@ -127,7 +135,11 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index eecea1a273f..2dffddb2dc9 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,17 +153,29 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertFrom(const IColumn & src, size_t n) +#else +void ColumnMap::doInsertFrom(const IColumn & src, size_t n) +#endif { nested->insertFrom(assert_cast(src).getNestedColumn(), n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { nested->insertRangeFrom( assert_cast(src).getNestedColumn(), @@ -210,7 +222,11 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_map = assert_cast(rhs); return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 52165d0d74e..a54071a2974 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -66,16 +66,28 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 1d12a59fd59..f060e74b315 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -221,7 +221,11 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnNullable & nullable_col = assert_cast(src); getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); @@ -258,7 +262,11 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertFrom(const IColumn & src, size_t n) +#else +void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); @@ -266,7 +274,11 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n) } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); @@ -402,7 +414,11 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else +int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, null_direction_hint); } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 510a4cacf1e..a6d0483e527 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,11 +69,21 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); @@ -90,7 +100,11 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif #if USE_EMBEDDED_COMPILER diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ded56b60e64..adcd42b16e9 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,12 +763,20 @@ void ColumnObject::get(size_t n, Field & res) const } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnObject::insertFrom(const IColumn & src, size_t n) +#else +void ColumnObject::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto & src_object = assert_cast(src); @@ -1093,10 +1101,4 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } -void ColumnObject::updateHashFast(SipHash & hash) const -{ - for (const auto & entry : subcolumns) - for (auto & part : entry->data.data) - part->updateHashFast(hash); -} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index b1b8827622f..fadf2e18779 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -209,8 +209,15 @@ public: void insert(const Field & field) override; bool tryInsert(const Field & field) override; void insertDefault() override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; @@ -228,7 +235,11 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void getExtremes(Field & min, Field & max) const override; /// All other methods throw exception. @@ -242,7 +253,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash & hash) const override; + void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 5190ceb49e5..809586d8810 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,11 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; @@ -248,7 +252,11 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnSparse::insertFrom(const IColumn & src, size_t n) +#else +void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) +#endif { if (const auto * src_sparse = typeid_cast(&src)) { @@ -446,7 +454,11 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else +int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { if (const auto * rhs_sparse = typeid_cast(&rhs_)) return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 12b2def7cf1..3e34d1de94a 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,10 +81,18 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertDefault() override; void insertManyDefaults(size_t length) override; @@ -98,7 +106,11 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index a84aea73486..1eda9714d62 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,11 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnString & src_concrete = assert_cast(src); const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; @@ -129,7 +133,11 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 39d4684fd89..602ffac65e8 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,11 @@ public: return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override +#else + void doInsertFrom(const IColumn & src_, size_t n) override +#endif { const ColumnString & src = assert_cast(src_); const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray. @@ -165,7 +169,11 @@ public: } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override { @@ -212,7 +220,11 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -238,7 +250,11 @@ public: offsets.push_back(offsets.back() + 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnString & rhs = assert_cast(rhs_); return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2159495b68f..9b822d7f570 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -205,7 +205,11 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnTuple & src = assert_cast(src_); @@ -218,7 +222,11 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnTuple & src_tuple = assert_cast(src); @@ -318,7 +326,11 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { column_length += length; const size_t tuple_size = columns.size(); @@ -470,7 +482,11 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } @@ -711,7 +727,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c ColumnPtr ColumnTuple::compress() const { if (columns.empty()) - return Ptr(); + { + return ColumnCompressed::create(size(), 0, + [n = column_length] + { + return ColumnTuple::create(n); + }); + } size_t byte_size = 0; Columns compressed; diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 0103f81b242..38e479791d4 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -65,8 +65,15 @@ public: void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif + void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; @@ -76,14 +83,22 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 0311efd4c83..ec1f8e0a4d5 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,11 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -488,7 +492,11 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { if (is_nullable) { diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ec47f5dfa74..ee5de4c2dde 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,17 +595,29 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) +#endif { insertFromImpl(src_, n, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +#else +void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) +#endif { insertRangeFromImpl(src_, start, length, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else +void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { insertManyFromImpl(src_, position, length, nullptr); } @@ -1174,7 +1186,11 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_variant = assert_cast(rhs); Discriminator left_discr = globalDiscriminatorAt(n); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index e5a4498f340..d91b8e93a7d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,9 +180,19 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#else + using IColumn::insertFrom; + using IColumn::insertManyFrom; + using IColumn::insertRangeFrom; + + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#endif /// Methods for insertion from another Variant but with known mapping between global discriminators. void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); @@ -213,7 +223,11 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 35d9f5386ed..19d1b800961 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -503,7 +503,11 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnVector & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bbd27c91a70..3a0acf5e312 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,12 +64,20 @@ public: return data.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override +#else + void doInsertFrom(const IColumn & src, size_t n) override +#endif { data.push_back(assert_cast(src).getData()[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -142,7 +150,11 @@ public: } /// This method implemented in header because it could be possibly devirtualized. +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#endif { return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } @@ -228,7 +240,11 @@ public: bool tryInsert(const DB::Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 90cccef2b03..552e52cf51c 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,11 @@ String IColumn::dumpStructure() const return res.str(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void IColumn::insertFrom(const IColumn & src, size_t n) +#else +void IColumn::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index afa301d5c1c..4b6f34e5aa2 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -1,15 +1,14 @@ #pragma once -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #include "config.h" - class SipHash; class Collator; @@ -180,18 +179,42 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertFrom(const IColumn & src, size_t n); +#else + void insertFrom(const IColumn & src, size_t n) + { + assertTypeEquality(src); + doInsertFrom(src, n); + } +#endif /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; +#else + void insertRangeFrom(const IColumn & src, size_t start, size_t length) + { + assertTypeEquality(src); + doInsertRangeFrom(src, start, length); + } +#endif /// Appends one element from other column with the same type multiple times. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) insertFrom(src, position); } +#else + void insertManyFrom(const IColumn & src, size_t position, size_t length) + { + assertTypeEquality(src); + doInsertManyFrom(src, position, length); + } +#endif /// Appends one field multiple times. Can be optimized in inherited classes. virtual void insertMany(const Field & field, size_t length) @@ -322,7 +345,15 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ +#if !defined(ABORT_ON_LOGICAL_ERROR) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; +#else + [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const + { + assertTypeEquality(rhs); + return doCompareAt(n, m, rhs, nan_direction_hint); + } +#endif #if USE_EMBEDDED_COMPILER @@ -610,6 +641,8 @@ public: [[nodiscard]] virtual bool isSparse() const { return false; } + [[nodiscard]] virtual bool isConst() const { return false; } + [[nodiscard]] virtual bool isCollationSupported() const { return false; } virtual ~IColumn() = default; @@ -633,6 +666,29 @@ protected: Equals equals, Sort full_sort, PartialSort partial_sort) const; + +#if defined(ABORT_ON_LOGICAL_ERROR) + virtual void doInsertFrom(const IColumn & src, size_t n); + + virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; + + virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) + { + for (size_t i = 0; i < length; ++i) + insertFrom(src, position); + } + + virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + +private: + void assertTypeEquality(const IColumn & rhs) const + { + /// For Sparse and Const columns, we can compare only internal types. It is considered normal to e.g. insert from normal vector column to a sparse vector column. + /// This case is specifically handled in ColumnSparse implementation. Similar situation with Const column. + /// For the rest of column types we can compare the types directly. + chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs)); + } +#endif }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 27f420fbc71..c19fb704d9b 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -26,7 +26,11 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { } @@ -67,12 +71,20 @@ public: { } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn &, size_t) override +#else + void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#else + void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#endif { s += length; } diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index f71f19a5da6..3398452b7ee 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -85,7 +85,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t, size_t) override +#else + void doInsertRangeFrom(const IColumn &, size_t, size_t) override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique."); } diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 325cf5559cd..645f6ed79f3 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,11 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } +#if !defined(ABORT_ON_LOGICAL_ERROR) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) +#else +static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) +#endif { size_t size = src.size(); dst.insertManyFrom(src, size / 2, size); diff --git a/src/Common/AtomicLogger.h b/src/Common/AtomicLogger.h index 0ece9e8a09a..c1bbdb41866 100644 --- a/src/Common/AtomicLogger.h +++ b/src/Common/AtomicLogger.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h index df3e014cfad..03c175fd37f 100644 --- a/src/Common/BinStringDecodeHelper.h +++ b/src/Common/BinStringDecodeHelper.h @@ -5,7 +5,7 @@ namespace DB { -static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2) +static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if ((end - pos) & 1) { @@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o ++out; } -static void inline binStringDecode(const char * pos, const char * end, char *& out) +static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if (pos == end) { @@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o ++out; } - assert((end - pos) % 8 == 0); + chassert((end - pos) % word_size == 0); while (end - pos != 0) { diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h deleted file mode 100644 index 97c0c3fbc06..00000000000 --- a/src/Common/CollectionOfDerived.h +++ /dev/null @@ -1,184 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/* This is a collections of objects derived from ItemBase. -* Collection contains no more than one instance for each derived type. -* The derived type is used to access the instance. -*/ - -template -class CollectionOfDerivedItems -{ -public: - using Self = CollectionOfDerivedItems; - using ItemPtr = std::shared_ptr; - -private: - struct Rec - { - std::type_index type_idx; - ItemPtr ptr; - - bool operator<(const Rec & other) const - { - return type_idx < other.type_idx; - } - - bool operator<(const std::type_index & value) const - { - return type_idx < value; - } - - bool operator==(const Rec & other) const - { - return type_idx == other.type_idx; - } - }; - using Records = std::vector; - -public: - void swap(Self & other) noexcept - { - records.swap(other.records); - } - - void clear() - { - records.clear(); - } - - bool empty() const - { - return records.empty(); - } - - size_t size() const - { - return records.size(); - } - - Self clone() const - { - Self result; - result.records.reserve(records.size()); - for (const auto & rec : records) - result.records.emplace_back(rec.type_idx, rec.ptr->clone()); - return result; - } - - void append(Self && other) - { - auto middle_idx = records.size(); - std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); - chassert(isUniqTypes()); - } - - template - void add(std::shared_ptr info) - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - return addImpl(std::type_index(typeid(T)), std::move(info)); - } - - template - std::shared_ptr get() const - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - return cast; - } - - template - std::shared_ptr extract() - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - - records.erase(it); - return cast; - } - - std::string debug() const - { - std::string result; - - for (auto & rec : records) - { - result.append(rec.type_idx.name()); - result.append(" "); - } - - return result; - } - -private: - bool isUniqTypes() const - { - auto uniq_it = std::adjacent_find(records.begin(), records.end()); - - return uniq_it == records.end(); - } - - void addImpl(std::type_index type_idx, ItemPtr item) - { - auto it = std::lower_bound(records.begin(), records.end(), type_idx); - - if (it == records.end()) - { - records.emplace_back(type_idx, item); - return; - } - - if (it->type_idx == type_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); - - - records.emplace(it, type_idx, item); - - chassert(isUniqTypes()); - } - - Records::const_iterator getImpl(std::type_index type_idx) const - { - auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); - - if (it == records.cend()) - return records.cend(); - - if (it->type_idx != type_idx) - return records.cend(); - - return it; - } - - Records records; -}; - -} diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h index ba94502962c..9d35d7cb8b0 100644 --- a/src/Common/ConcurrencyControl.h +++ b/src/Common/ConcurrencyControl.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 8516a88c7af..7c97e73f278 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -267,7 +267,7 @@ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ M(S3Requests, "S3 requests count") \ M(KeeperAliveConnections, "Number of alive connections") \ - M(KeeperOutstandingRequets, "Number of outstanding requests") \ + M(KeeperOutstandingRequests, "Number of outstanding requests") \ M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \ M(IOUringPendingEvents, "Number of io_uring SQEs waiting to be submitted") \ M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \ diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 1f4b0aea8f2..111280074dd 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,10 +38,19 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } +void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size) +{ + auto & logger = Poco::Logger::root(); + LOG_FATAL(&logger, "Logical error: '{}'.", description); + if (trace) + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size)); + abort(); +} + void abortOnFailedAssertion(const String & description) { - LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description); - abort(); + StackTrace st; + abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize()); } bool terminate_on_any_exception = false; @@ -58,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg); + abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); } #endif diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 87ef7101cdc..a4774a89f6a 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -25,8 +25,6 @@ namespace DB class AtomicLogger; -[[noreturn]] void abortOnFailedAssertion(const String & description); - /// This flag can be set for testing purposes - to check that no exceptions are thrown. extern bool terminate_on_any_exception; @@ -167,6 +165,8 @@ protected: mutable std::vector capture_thread_frame_pointers; }; +[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size); +[[noreturn]] void abortOnFailedAssertion(const String & description); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp new file mode 100644 index 00000000000..6c1a8496fe6 --- /dev/null +++ b/src/Common/FieldBinaryEncoding.cpp @@ -0,0 +1,389 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_DATA; +} + +namespace +{ + +enum class FieldBinaryTypeIndex: uint8_t +{ + Null = 0x00, + UInt64 = 0x01, + Int64 = 0x02, + UInt128 = 0x03, + Int128 = 0x04, + UInt256 = 0x05, + Int256 = 0x06, + Float64 = 0x07, + Decimal32 = 0x08, + Decimal64 = 0x09, + Decimal128 = 0x0A, + Decimal256 = 0x0B, + String = 0x0C, + Array = 0x0D, + Tuple = 0x0E, + Map = 0x0F, + IPv4 = 0x10, + IPv6 = 0x11, + UUID = 0x12, + Bool = 0x13, + Object = 0x14, + AggregateFunctionState = 0x15, + + NegativeInfinity = 0xFE, + PositiveInfinity = 0xFF, +}; + +class FieldVisitorEncodeBinary +{ +public: + void operator() (const Null & x, WriteBuffer & buf) const; + void operator() (const UInt64 & x, WriteBuffer & buf) const; + void operator() (const UInt128 & x, WriteBuffer & buf) const; + void operator() (const UInt256 & x, WriteBuffer & buf) const; + void operator() (const Int64 & x, WriteBuffer & buf) const; + void operator() (const Int128 & x, WriteBuffer & buf) const; + void operator() (const Int256 & x, WriteBuffer & buf) const; + void operator() (const UUID & x, WriteBuffer & buf) const; + void operator() (const IPv4 & x, WriteBuffer & buf) const; + void operator() (const IPv6 & x, WriteBuffer & buf) const; + void operator() (const Float64 & x, WriteBuffer & buf) const; + void operator() (const String & x, WriteBuffer & buf) const; + void operator() (const Array & x, WriteBuffer & buf) const; + void operator() (const Tuple & x, WriteBuffer & buf) const; + void operator() (const Map & x, WriteBuffer & buf) const; + void operator() (const Object & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const; + [[noreturn]] void operator() (const CustomType & x, WriteBuffer & buf) const; + void operator() (const bool & x, WriteBuffer & buf) const; +}; + +void FieldVisitorEncodeBinary::operator() (const Null & x, WriteBuffer & buf) const +{ + if (x.isNull()) + writeBinary(UInt8(FieldBinaryTypeIndex::Null), buf); + else if (x.isPositiveInfinity()) + writeBinary(UInt8(FieldBinaryTypeIndex::PositiveInfinity), buf); + else if (x.isNegativeInfinity()) + writeBinary(UInt8(FieldBinaryTypeIndex::NegativeInfinity), buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt64), buf); + writeVarUInt(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int64), buf); + writeVarInt(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Float64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Float64), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const String & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::String), buf); + writeStringBinary(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt128 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt128), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int128 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int128), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt256 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt256), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int256 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int256), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UUID & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UUID), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const IPv4 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::IPv4), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const IPv6 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::IPv6), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal32), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal64), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal128), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal256), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::AggregateFunctionState), buf); + writeStringBinary(x.name, buf); + writeStringBinary(x.data, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Array & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Array), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, x[i]); +} + +void FieldVisitorEncodeBinary::operator() (const Tuple & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Tuple), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, x[i]); +} + +void FieldVisitorEncodeBinary::operator() (const Map & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Map), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + { + const Tuple & key_and_value = x[i].get(); + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[0]); + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[1]); + } +} + +void FieldVisitorEncodeBinary::operator() (const Object & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Object), buf); + + size_t size = x.size(); + writeVarUInt(size, buf); + for (const auto & [key, value] : x) + { + writeStringBinary(key, buf); + Field::dispatch([&buf] (const auto & val) { FieldVisitorEncodeBinary()(val, buf); }, value); + } +} + +void FieldVisitorEncodeBinary::operator()(const bool & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Bool), buf); + writeBinary(static_cast(x), buf); +} + +[[noreturn]] void FieldVisitorEncodeBinary::operator()(const CustomType &, WriteBuffer &) const +{ + /// TODO: Support binary encoding/decoding for custom types somehow. + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of Field with custom type is not supported"); +} + +template +Field decodeBigInteger(ReadBuffer & buf) +{ + T value; + readBinaryLittleEndian(value, buf); + return value; +} + +template +DecimalField decodeDecimal(ReadBuffer & buf) +{ + UInt32 scale; + readVarUInt(scale, buf); + T value; + readBinaryLittleEndian(value, buf); + return DecimalField(value, scale); +} + +template +T decodeValueLittleEndian(ReadBuffer & buf) +{ + T value; + readBinaryLittleEndian(value, buf); + return value; +} + +template +T decodeArrayLikeField(ReadBuffer & buf) +{ + size_t size; + readVarUInt(size, buf); + T value; + for (size_t i = 0; i != size; ++i) + value.push_back(decodeField(buf)); + return value; +} + +} +void encodeField(const Field & x, WriteBuffer & buf) +{ + Field::dispatch([&buf] (const auto & val) { FieldVisitorEncodeBinary()(val, buf); }, x); +} + +Field decodeField(ReadBuffer & buf) +{ + UInt8 type; + readBinary(type, buf); + switch (FieldBinaryTypeIndex(type)) + { + case FieldBinaryTypeIndex::Null: + return Null(); + case FieldBinaryTypeIndex::PositiveInfinity: + return POSITIVE_INFINITY; + case FieldBinaryTypeIndex::NegativeInfinity: + return NEGATIVE_INFINITY; + case FieldBinaryTypeIndex::Int64: + { + Int64 value; + readVarInt(value, buf); + return value; + } + case FieldBinaryTypeIndex::UInt64: + { + UInt64 value; + readVarUInt(value, buf); + return value; + } + case FieldBinaryTypeIndex::Int128: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::UInt128: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::Int256: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::UInt256: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::Float64: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::Decimal32: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal64: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal128: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal256: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::String: + { + String value; + readStringBinary(value, buf); + return value; + } + case FieldBinaryTypeIndex::UUID: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::IPv4: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::IPv6: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::Bool: + { + bool value; + readBinary(value, buf); + return value; + } + case FieldBinaryTypeIndex::Array: + return decodeArrayLikeField(buf); + case FieldBinaryTypeIndex::Tuple: + return decodeArrayLikeField(buf); + case FieldBinaryTypeIndex::Map: + { + size_t size; + readVarUInt(size, buf); + Map map; + for (size_t i = 0; i != size; ++i) + { + Tuple key_and_value; + key_and_value.push_back(decodeField(buf)); + key_and_value.push_back(decodeField(buf)); + map.push_back(key_and_value); + } + return map; + } + case FieldBinaryTypeIndex::Object: + { + size_t size; + readVarUInt(size, buf); + Object value; + for (size_t i = 0; i != size; ++i) + { + String name; + readStringBinary(name, buf); + value[name] = decodeField(buf); + } + return value; + } + case FieldBinaryTypeIndex::AggregateFunctionState: + { + String name; + readStringBinary(name, buf); + String data; + readStringBinary(data, buf); + return AggregateFunctionStateData{.name = name, .data = data}; + } + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown Field type: {0:#04x}", UInt64(type)); +} + +} diff --git a/src/Common/FieldBinaryEncoding.h b/src/Common/FieldBinaryEncoding.h new file mode 100644 index 00000000000..aa6694cb03e --- /dev/null +++ b/src/Common/FieldBinaryEncoding.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +namespace DB +{ + +/** +Binary encoding for Fields: +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| Field type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +*/ + +void encodeField(const Field &, WriteBuffer & buf); +Field decodeField(ReadBuffer & buf); + +} diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index f8e1fe87276..d66ea6018d4 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -7,19 +7,20 @@ namespace DB /// Kind of a temporal interval. struct IntervalKind { + /// note: The order and numbers are important and used in binary encoding, append new interval kinds to the end of list. enum class Kind : uint8_t { - Nanosecond, - Microsecond, - Millisecond, - Second, - Minute, - Hour, - Day, - Week, - Month, - Quarter, - Year, + Nanosecond = 0x00, + Microsecond = 0x01, + Millisecond = 0x02, + Second = 0x03, + Minute = 0x04, + Hour = 0x05, + Day = 0x06, + Week = 0x07, + Month = 0x08, + Quarter = 0x09, + Year = 0x0A, }; Kind kind = Kind::Second; diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index 827d142266a..db679b14f52 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -14,6 +14,7 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_ALLOCATE_MEMORY; diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index 14105a8651d..2faea1957ba 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -235,7 +235,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard & lock) loadFromConfig(context->getConfigRef(), lock); loadFromSQL(lock); - if (metadata_storage->supportsPeriodicUpdate()) + if (metadata_storage->isReplicated()) { update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); }); update_task->activate(); @@ -357,6 +357,13 @@ void NamedCollectionFactory::reloadFromSQL() add(std::move(collections), lock); } +bool NamedCollectionFactory::usesReplicatedStorage() +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + return metadata_storage->isReplicated(); +} + void NamedCollectionFactory::updateFunc() { LOG_TRACE(log, "Named collections background updating thread started"); diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.h b/src/Common/NamedCollections/NamedCollectionsFactory.h index 6ee5940e686..a0721ad8a50 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.h +++ b/src/Common/NamedCollections/NamedCollectionsFactory.h @@ -34,6 +34,8 @@ public: void updateFromSQL(const ASTAlterNamedCollectionQuery & query); + bool usesReplicatedStorage(); + void loadIfNot(); void shutdown(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 32fdb25abd3..b3671350f92 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -67,7 +67,7 @@ public: virtual bool removeIfExists(const std::string & path) = 0; - virtual bool supportsPeriodicUpdate() const = 0; + virtual bool isReplicated() const = 0; virtual bool waitUpdate(size_t /* timeout */) { return false; } }; @@ -89,7 +89,7 @@ public: ~LocalStorage() override = default; - bool supportsPeriodicUpdate() const override { return false; } + bool isReplicated() const override { return false; } std::vector list() const override { @@ -221,7 +221,7 @@ public: ~ZooKeeperStorage() override = default; - bool supportsPeriodicUpdate() const override { return true; } + bool isReplicated() const override { return true; } /// Return true if children changed. bool waitUpdate(size_t timeout) override @@ -465,14 +465,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace); } -bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const +bool NamedCollectionsMetadataStorage::isReplicated() const { - return storage->supportsPeriodicUpdate(); + return storage->isReplicated(); } bool NamedCollectionsMetadataStorage::waitUpdate() { - if (!storage->supportsPeriodicUpdate()) + if (!storage->isReplicated()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported"); const auto & config = Context::getGlobalContextInstance()->getConfigRef(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index 3c089fe2fa2..c3468fbc468 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -30,7 +30,7 @@ public: /// Return true if update was made bool waitUpdate(); - bool supportsPeriodicUpdate() const; + bool isReplicated() const; private: class INamedCollectionsStorage; diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 2359137012c..3d4de773a36 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -28,7 +28,6 @@ namespace ErrorCodes namespace ProfileEvents { - extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailAtAll; extern const Event DistributedConnectionSkipReadOnlyReplica; } @@ -285,7 +284,6 @@ PoolWithFailoverBase::getMany( else { LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 439965a92fb..b28c1b2e87e 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -568,6 +568,7 @@ The server successfully detected this situation and will download merged part fr M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \ + M(HashJoinPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for hash join.") \ \ M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \ diff --git a/src/Common/RemoteHostFilter.h b/src/Common/RemoteHostFilter.h index 2b91306f405..4c8983205fa 100644 --- a/src/Common/RemoteHostFilter.h +++ b/src/Common/RemoteHostFilter.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 239e957bdfe..34f6f0b7535 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -545,7 +545,7 @@ std::string StackTrace::toString() const return toStringCached(frame_pointers, offset, size); } -std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size) +std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size) { __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw)); diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ce9a9281f3..2078828f3d7 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -59,7 +59,7 @@ public: const FramePointers & getFramePointers() const { return frame_pointers; } std::string toString() const; - static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static std::string toString(void * const * frame_pointers, size_t offset, size_t size); static void dropCache(); /// @param fatal - if true, will process inline frames (slower) diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 410576c2b4a..c133e9f5617 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -31,8 +31,10 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms) clickhouse_add_executable (lru_hash_map_perf lru_hash_map_perf.cpp) target_link_libraries (lru_hash_map_perf PRIVATE dbms) -clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) -target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) +if (OS_LINUX) + clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) + target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) +endif() clickhouse_add_executable (array_cache array_cache.cpp) target_link_libraries (array_cache PRIVATE clickhouse_common_io) diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index cc5b18214c8..546e9e91dc7 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -228,7 +228,6 @@ Pool::Entry Pool::tryGet() for (auto connection_it = connections.cbegin(); connection_it != connections.cend();) { Connection * connection_ptr = *connection_it; - /// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator if (connection_ptr->ref_count == 0) { { diff --git a/src/Common/mysqlxx/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h index 6e509d8bdd6..f1ef81e28dd 100644 --- a/src/Common/mysqlxx/mysqlxx/Pool.h +++ b/src/Common/mysqlxx/mysqlxx/Pool.h @@ -64,17 +64,6 @@ public: decrementRefCount(); } - Entry & operator= (const Entry & src) /// NOLINT - { - pool = src.pool; - if (data) - decrementRefCount(); - data = src.data; - if (data) - incrementRefCount(); - return * this; - } - bool isNull() const { return data == nullptr; diff --git a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp index 61d6a117285..121767edc84 100644 --- a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp +++ b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp @@ -13,13 +13,11 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool) constexpr size_t max_tries = 3; - mysqlxx::Pool::Entry worker_connection; - for (size_t try_no = 1; try_no <= max_tries; ++try_no) { try { - worker_connection = connections_pool.tryGet(); + mysqlxx::Pool::Entry worker_connection = connections_pool.tryGet(); if (!worker_connection.isNull()) { diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index c9b45d9a344..0f833c17e1b 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 2a53bade62f..e3289982b0d 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,9 +2,11 @@ #include "config.h" +#include #include #include #include +#include #include namespace DB diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index b4241235cc7..7589e3393be 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -372,7 +372,7 @@ extern const std::vector keeper_profile_events M(AsynchronousReadWait) \ M(S3Requests) \ M(KeeperAliveConnections) \ - M(KeeperOutstandingRequets) \ + M(KeeperOutstandingRequests) \ M(ThreadsInOvercommitTracker) \ M(IOUringPendingEvents) \ M(IOUringInFlightEvents) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index f36b1ef151f..fc72e5fab59 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -6,6 +6,7 @@ #include #include "Common/ZooKeeper/IKeeper.h" +#include "Common/ZooKeeper/ZooKeeperCommon.h" #include #include #include @@ -28,7 +29,7 @@ namespace CurrentMetrics { extern const Metric KeeperAliveConnections; - extern const Metric KeeperOutstandingRequets; + extern const Metric KeeperOutstandingRequests; } namespace ProfileEvents @@ -139,7 +140,7 @@ void KeeperDispatcher::requestThread() { if (requests_queue->tryPop(request, max_wait)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); if (shutdown_called) break; @@ -171,7 +172,7 @@ void KeeperDispatcher::requestThread() /// Trying to get batch requests as fast as possible if (requests_queue->tryPop(request)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); /// Don't append read request into batch, we have to process them separately if (!coordination_settings->quorum_reads && request.request->isReadRequest()) { @@ -320,7 +321,7 @@ void KeeperDispatcher::responseThread() try { - setResponse(response_for_session.session_id, response_for_session.response); + setResponse(response_for_session.session_id, response_for_session.response, response_for_session.request); } catch (...) { @@ -355,7 +356,7 @@ void KeeperDispatcher::snapshotThread() } } -void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) +void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request) { std::lock_guard lock(session_to_response_callback_mutex); @@ -369,7 +370,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe return; auto callback = new_session_id_response_callback[session_id_resp.internal_id]; - callback(response); + callback(response, request); new_session_id_response_callback.erase(session_id_resp.internal_id); } else /// Normal response, just write to client @@ -380,7 +381,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe if (session_response_callback == session_to_response_callback.end()) return; - session_response_callback->second(response); + session_response_callback->second(response, request); /// Session closed, no more writes if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close) @@ -419,7 +420,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ { throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push request to queue within operation timeout"); } - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); return true; } @@ -543,7 +544,7 @@ void KeeperDispatcher::shutdown() /// Set session expired for all pending requests while (requests_queue && requests_queue->tryPop(request_for_session)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); auto response = request_for_session.request->makeResponse(); response->error = Coordination::Error::ZSESSIONEXPIRED; setResponse(request_for_session.session_id, response); @@ -670,7 +671,7 @@ void KeeperDispatcher::sessionCleanerTask() }; if (!requests_queue->push(std::move(request_info))) LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); /// Remove session from registered sessions finishSession(dead_session); @@ -771,21 +772,27 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { std::lock_guard lock(session_to_response_callback_mutex); - new_session_id_response_callback[request->internal_id] = [promise, internal_id = request->internal_id] (const Coordination::ZooKeeperResponsePtr & response) + new_session_id_response_callback[request->internal_id] + = [promise, internal_id = request->internal_id]( + const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr /*request*/) { if (response->getOpNum() != Coordination::OpNum::SessionID) - promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); + promise->set_exception(std::make_exception_ptr(Exception( + ErrorCodes::LOGICAL_ERROR, "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); auto session_id_response = dynamic_cast(*response); if (session_id_response.internal_id != internal_id) { - promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response with internal id {} instead of {}", session_id_response.internal_id, internal_id))); + promise->set_exception(std::make_exception_ptr(Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect response with internal id {} instead of {}", + session_id_response.internal_id, + internal_id))); } if (response->error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException::fromMessage(response->error, "SessionID request failed with error"))); + promise->set_exception( + std::make_exception_ptr(zkutil::KeeperException::fromMessage(response->error, "SessionID request failed with error"))); promise->set_value(session_id_response.session_id); }; @@ -794,7 +801,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) /// Push new session request to queue if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms)) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push session id request to queue within session timeout"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot receive session id within session timeout"); diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 2e0c73131d5..a487b886d98 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -20,7 +20,7 @@ namespace DB { -using ZooKeeperResponseCallback = std::function; +using ZooKeeperResponseCallback = std::function; /// Highlevel wrapper for ClickHouse Keeper. /// Process user requests via consensus and return responses. @@ -92,7 +92,7 @@ private: void clusterUpdateWithReconfigDisabledThread(); void clusterUpdateThread(); - void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); + void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request = nullptr); /// Add error responses for requests to responses queue. /// Clears requests. diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e7cae714ba6..3d3d862e1dd 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -407,7 +407,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [&](const KeeperStorage::ResponseForSession& response) + auto try_push = [&](const KeeperStorage::ResponseForSession & response) { if (!responses_queue.push(response)) { @@ -416,17 +416,6 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); } - - using namespace std::chrono; - uint64_t elapsed = duration_cast(system_clock::now().time_since_epoch()).count() - request_for_session->time; - if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) - { - LOG_INFO( - log, - "Total time to process a request took too long ({}ms).\nRequest info: {}", - elapsed, - request_for_session->request->toString(/*short_format=*/true)); - } }; try @@ -443,6 +432,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n KeeperStorage::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; + response_for_session.request = request_for_session->request; LockGuardWithStats lock(storage_and_responses_lock); session_id = storage->getSessionID(session_id_request.session_timeout_ms); @@ -462,8 +452,14 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); + for (auto & response_for_session : responses_for_sessions) + { + if (response_for_session.response->xid != Coordination::WATCH_XID) + response_for_session.request = request_for_session->request; + try_push(response_for_session); + } if (keeper_context->digestEnabled() && request_for_session->digest) assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true); @@ -797,9 +793,14 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi LockGuardWithStats lock(storage_and_responses_lock); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); - for (const auto & response : responses) - if (!responses_queue.push(response)) - LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); + + for (auto & response_for_session : responses) + { + if (response_for_session.response->xid != Coordination::WATCH_XID) + response_for_session.request = request_for_session.request; + if (!responses_queue.push(response_for_session)) + LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response_for_session.session_id); + } } void KeeperStateMachine::shutdownStorage() diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index d5e9a64e69c..f7812ad8877 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -206,6 +206,7 @@ public: { int64_t session_id; Coordination::ZooKeeperResponsePtr response; + Coordination::ZooKeeperRequestPtr request = nullptr; }; using ResponsesForSessions = std::vector; diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index a034c50094d..5014564dbe0 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -27,7 +27,8 @@ PoolWithFailover::PoolWithFailover( size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, - bool auto_close_connection_) + bool auto_close_connection_, + size_t connection_attempt_timeout_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) , auto_close_connection(auto_close_connection_) @@ -39,8 +40,13 @@ PoolWithFailover::PoolWithFailover( { for (const auto & replica_configuration : configurations) { - auto connection_info = formatConnectionString(replica_configuration.database, - replica_configuration.host, replica_configuration.port, replica_configuration.username, replica_configuration.password); + auto connection_info = formatConnectionString( + replica_configuration.database, + replica_configuration.host, + replica_configuration.port, + replica_configuration.username, + replica_configuration.password, + connection_attempt_timeout_); replicas_with_priority[priority].emplace_back(connection_info, pool_size); } } @@ -51,7 +57,8 @@ PoolWithFailover::PoolWithFailover( size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, - bool auto_close_connection_) + bool auto_close_connection_, + size_t connection_attempt_timeout_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) , auto_close_connection(auto_close_connection_) @@ -63,7 +70,13 @@ PoolWithFailover::PoolWithFailover( for (const auto & [host, port] : configuration.addresses) { LOG_DEBUG(getLogger("PostgreSQLPoolWithFailover"), "Adding address host: {}, port: {} to connection pool", host, port); - auto connection_string = formatConnectionString(configuration.database, host, port, configuration.username, configuration.password); + auto connection_string = formatConnectionString( + configuration.database, + host, + port, + configuration.username, + configuration.password, + connection_attempt_timeout_); replicas_with_priority[0].emplace_back(connection_string, pool_size); } } diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 3c538fc3dea..502a9a9b7d7 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -14,7 +14,6 @@ static constexpr inline auto POSTGRESQL_POOL_DEFAULT_SIZE = 16; static constexpr inline auto POSTGRESQL_POOL_WAIT_TIMEOUT = 5000; -static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 2; namespace postgres { @@ -30,14 +29,16 @@ public: size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, - bool auto_close_connection_); + bool auto_close_connection_, + size_t connection_attempt_timeout_); explicit PoolWithFailover( const DB::StoragePostgreSQL::Configuration & configuration, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, - bool auto_close_connection_); + bool auto_close_connection_, + size_t connection_attempt_timeout_); PoolWithFailover(const PoolWithFailover & other) = delete; diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 810bf62fdab..9dc010c1c69 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -8,7 +8,7 @@ namespace postgres { -ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password) +ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password, UInt64 timeout) { DB::WriteBufferFromOwnString out; out << "dbname=" << DB::quote << dbname @@ -16,7 +16,7 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S << " port=" << port << " user=" << DB::quote << user << " password=" << DB::quote << password - << " connect_timeout=2"; + << " connect_timeout=" << timeout; return {out.str(), host + ':' + DB::toString(port)}; } diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index f179ab14c89..f2b8f1ac084 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -18,7 +18,7 @@ namespace pqxx namespace postgres { -ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password); +ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password, UInt64 timeout); String getConnectionForLog(const String & host, UInt16 port); diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 4c0848c0706..2e5b91e9b1b 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -63,7 +63,7 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; /// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; -/// Market for JSON Web Token authentication +/// Marker for JSON Web Token authentication const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION "; }; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d473810bcb8..28b32a6e6a5 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -151,8 +151,10 @@ namespace DB M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ + M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 81d0aa0c51d..52fa28a4481 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -167,9 +167,6 @@ class IColumn; M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ - M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ - M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ - \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \ @@ -205,21 +202,6 @@ class IColumn; M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ - M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ - M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ - M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ - M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ - M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ - M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ - \ - M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ - M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ - M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ - M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ - M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ - M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ - M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ - M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ \ @@ -251,8 +233,6 @@ class IColumn; M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \ M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \ - M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ - M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ \ M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \ M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \ @@ -341,7 +321,6 @@ class IColumn; M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ - M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ @@ -367,6 +346,7 @@ class IColumn; \ M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ + M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ @@ -392,7 +372,6 @@ class IColumn; M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \ M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ - M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \ M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ @@ -402,8 +381,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ - M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ \ @@ -553,6 +531,7 @@ class IColumn; M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_read_in_window_order, true, "Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ + M(Bool, read_in_order_use_buffering, true, "Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution", 0) \ M(UInt64, aggregation_in_order_max_block_bytes, 50000000, "Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.", 0) \ M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ @@ -583,7 +562,9 @@ class IColumn; M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ \ M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \ + M(UInt64, postgresql_connection_attempt_timeout, 2, "Connection timeout to PostgreSQL table engine and database engine in seconds.", 0) \ M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \ + M(UInt64, postgresql_connection_pool_retries, 2, "Connection pool push/pop retries number for PostgreSQL table engine and database engine.", 0) \ M(Bool, postgresql_connection_pool_auto_close_connection, false, "Close connection before returning connection to the pool.", 0) \ M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \ M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \ @@ -593,13 +574,6 @@ class IColumn; M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \ M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \ \ - M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ - M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ - M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ - M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ - M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ - M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ - M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ @@ -634,12 +608,11 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ - M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ - M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ @@ -656,8 +629,6 @@ class IColumn; M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \ M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, "CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.", 0) \ M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \ - M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ - M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \ M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \ M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ @@ -710,18 +681,17 @@ class IColumn; M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ \ M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ - M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \ \ + M(Bool, collect_hash_table_stats_during_joins, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ + M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before join", 0) \ + \ M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \ M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \ M(Bool, allow_aggregate_partitions_independently, false, "Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size", 0) \ M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \ M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, "Maximal number of partitions in table to apply optimization", 0) \ M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, "Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled", 0) \ - /** Experimental feature for moving data between shards. */ \ - \ - M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ \ M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \ M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \ @@ -754,6 +724,7 @@ class IColumn; M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \ M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \ + M(Bool, enable_named_columns_in_function_tuple, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers.", 0) \ \ M(Bool, query_plan_enable_optimizations, true, "Globally enable/disable query optimization at the query plan level", 0) \ M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \ @@ -761,6 +732,7 @@ class IColumn; M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \ M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \ M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \ + M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \ M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \ M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \ @@ -902,34 +874,11 @@ class IColumn; M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \ - \ - /** Experimental functions */ \ - M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ - M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ - M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ - M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ - M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ - M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ - M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ - M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ - M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ - M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ - M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ - M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ - M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ - M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ - M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ - M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ - M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ - M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ - M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ - M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\ + M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \ M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \ @@ -939,6 +888,81 @@ class IColumn; M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ + M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ + M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ + M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ + \ + \ + /* ###################################### */ \ + /* ######## EXPERIMENTAL FEATURES ####### */ \ + /* ###################################### */ \ + M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ + M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ + M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ + M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ + M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ + M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ + M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ + M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ + M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ + M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ + M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ + M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ + M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ + M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ + M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + \ + M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ + M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ + \ + /* Parallel replicas */ \ + M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ + M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ + M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ + M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ + M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ + M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ + M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ + M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ + M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ + M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ + M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ + M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ + M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ + M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + \ + M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ + M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ + \ + M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ + \ + /* Analyzer: It's not experimental anymore (WIP) */ \ + M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \ + M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ + M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ + M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ + M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ + \ + M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ + M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ + M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ + \ + M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ + M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ + \ + M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ + M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ + \ + /** Experimental feature for moving data between shards. */ \ + M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ + + /** End of experimental features */ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -952,7 +976,6 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ @@ -992,6 +1015,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_entries_for_hash_table_stats, 10'000) \ /* ---- */ \ MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \ MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ @@ -1046,6 +1070,7 @@ class IColumn; M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \ + M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \ M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \ @@ -1113,6 +1138,8 @@ class IColumn; M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \ \ M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \ + M(Bool, input_format_native_decode_types_in_binary_format, false, "Read data types in binary format instead of type names in Native input format", 0) \ + M(Bool, output_format_native_encode_types_in_binary_format, false, "Write data types in binary format instead of type names in Native output format", 0) \ \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ @@ -1132,6 +1159,8 @@ class IColumn; M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ M(UInt64, format_binary_max_string_size, 1_GiB, "The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ M(UInt64, format_binary_max_array_size, 1_GiB, "The maximum allowed size for Array in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ + M(Bool, input_format_binary_decode_types_in_binary_format, false, "Read data types in binary format instead of type names in RowBinaryWithNamesAndTypes input format", 0) \ + M(Bool, output_format_binary_encode_types_in_binary_format, false, "Write data types in binary format instead of type names in RowBinaryWithNamesAndTypes output format ", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ @@ -1152,9 +1181,9 @@ class IColumn; M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \ M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \ - M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ + M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 8c096c13634..b9b72209103 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -58,14 +58,27 @@ String ClickHouseVersion::toString() const static std::initializer_list> settings_changes_history_initializer = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, + {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, + {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, + {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, + {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, + {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, + {"collect_hash_table_stats_during_joins", false, true, "New setting."}, + {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, + {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."}, {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, + {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, + {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, + {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, + {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, diff --git a/src/Core/tests/gtest_fields_binary_enciding.cpp b/src/Core/tests/gtest_fields_binary_enciding.cpp new file mode 100644 index 00000000000..087caf746bb --- /dev/null +++ b/src/Core/tests/gtest_fields_binary_enciding.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include + +using namespace DB; + +namespace DB::ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + + +void check(const Field & field) +{ +// std::cerr << "Check " << toString(field) << "\n"; + WriteBufferFromOwnString ostr; + encodeField(field, ostr); + ReadBufferFromString istr(ostr.str()); + Field decoded_field = decodeField(istr); + ASSERT_TRUE(istr.eof()); + ASSERT_EQ(field, decoded_field); +} + +GTEST_TEST(FieldBinaryEncoding, EncodeAndDecode) +{ + check(Null()); + check(POSITIVE_INFINITY); + check(NEGATIVE_INFINITY); + check(true); + check(UInt64(42)); + check(Int64(-42)); + check(UInt128(42)); + check(Int128(-42)); + check(UInt256(42)); + check(Int256(-42)); + check(UUID(42)); + check(IPv4(42)); + check(IPv6(42)); + check(Float64(42.42)); + check(String("Hello, World!")); + check(Array({Field(UInt64(42)), Field(UInt64(43))})); + check(Tuple({Field(UInt64(42)), Field(Null()), Field(UUID(42)), Field(String("Hello, World!"))})); + check(Map({Tuple{Field(UInt64(42)), Field(String("str_42"))}, Tuple{Field(UInt64(43)), Field(String("str_43"))}})); + check(Object({{String("key_1"), Field(UInt64(42))}, {String("key_2"), Field(UInt64(43))}})); + check(DecimalField(4242, 3)); + check(DecimalField(4242, 3)); + check(DecimalField(Int128(4242), 3)); + check(DecimalField(Int256(4242), 3)); + check(AggregateFunctionStateData{.name="some_name", .data="some_data"}); + try + { + check(CustomType()); + } + catch (const Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::UNSUPPORTED_METHOD); + } + + check(Array({ + Tuple({Field(UInt64(42)), Map({Tuple{Field(UInt64(42)), Field(String("str_42"))}, Tuple{Field(UInt64(43)), Field(String("str_43"))}}), Field(UUID(42)), Field(String("Hello, World!"))}), + Tuple({Field(UInt64(43)), Map({Tuple{Field(UInt64(43)), Field(String("str_43"))}, Tuple{Field(UInt64(44)), Field(String("str_44"))}}), Field(UUID(43)), Field(String("Hello, World 2!"))}) + })); +} + diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 8b4b3d6ee4c..52ed151107e 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -25,7 +25,6 @@ private: mutable std::optional version; String getNameImpl(bool with_version) const; - size_t getVersion() const; public: static constexpr bool is_parametric = true; @@ -39,6 +38,8 @@ public: { } + size_t getVersion() const; + String getFunctionName() const; AggregateFunctionPtr getFunction() const { return function; } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index cae9622bcb9..d3b3adc4965 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -165,6 +165,19 @@ static std::pair create(const ASTPtr & argum return std::make_pair(storage_type, std::make_unique(std::move(custom_name), nullptr)); } +String DataTypeCustomSimpleAggregateFunction::getFunctionName() const +{ + return function->getName(); +} + +DataTypePtr createSimpleAggregateFunctionType(const AggregateFunctionPtr & function, const DataTypes & argument_types, const Array & parameters) +{ + auto custom_desc = std::make_unique( + std::make_unique(function, argument_types, parameters)); + + return DataTypeFactory::instance().getCustom(std::move(custom_desc)); +} + void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) { factory.registerDataTypeCustom("SimpleAggregateFunction", create); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index bdabb465fe5..303da86979a 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -40,8 +40,13 @@ public: : function(function_), argument_types(argument_types_), parameters(parameters_) {} AggregateFunctionPtr getFunction() const { return function; } + String getFunctionName() const; + const DataTypes & getArgumentsDataTypes() const { return argument_types; } + const Array & getParameters() const { return parameters; } String getName() const override; static void checkSupportedFunctions(const AggregateFunctionPtr & function); }; +DataTypePtr createSimpleAggregateFunctionType(const AggregateFunctionPtr & function, const DataTypes & argument_types, const Array & parameters); + } diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..a1b1f8325f0 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -2,9 +2,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -69,7 +71,7 @@ static DataTypePtr create(const ASTPtr & arguments) auto * literal = argument->arguments->children[1]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > 255) + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); return std::make_shared(literal->value.get()); @@ -110,28 +112,58 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa } /// Extract nested subcolumn of requested dynamic subcolumn if needed. - if (!subcolumn_nested_name.empty()) + /// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet. + bool is_null_map_subcolumn = subcolumn_nested_name == "null"; + if (is_null_map_subcolumn) + { + res->type = std::make_shared(); + } + else if (!subcolumn_nested_name.empty()) { res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); if (!res) return nullptr; } - res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); - res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn); + /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()). + bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality(); + if (!is_null_map_subcolumn && make_subcolumn_nullable) + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) { if (discriminator) { - /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to /// create full subcolumn from variant according to discriminators. const auto & variant_column = assert_cast(*data.column).getVariantColumn(); - auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); - res->column = creator.create(res->column); + std::unique_ptr creator; + if (is_null_map_subcolumn) + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator)); + else + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator), + make_subcolumn_nullable); + res->column = creator->create(res->column); + } + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + else if (is_null_map_subcolumn) + { + /// Fill null map with 1 when there is no such Dynamic subcolumn. + auto column = ColumnUInt8::create(); + assert_cast(*column).getData().resize_fill(data.column->size(), 1); + res->column = std::move(column); } else { - /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. auto column = res->type->createColumn(); column->insertManyDefaults(data.column->size()); res->column = std::move(column); diff --git a/src/DataTypes/DataTypeNested.h b/src/DataTypes/DataTypeNested.h index 1ad06477a6e..102e6c293cc 100644 --- a/src/DataTypes/DataTypeNested.h +++ b/src/DataTypes/DataTypeNested.h @@ -19,6 +19,8 @@ public: } String getName() const override; + const DataTypes & getElements() const { return elems; } + const Names & getNames() const { return names; } }; DataTypePtr createNested(const DataTypes & types, const Names & names); diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp new file mode 100644 index 00000000000..bd994e313ba --- /dev/null +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -0,0 +1,705 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_DATA; +} + +namespace +{ + +enum class BinaryTypeIndex : uint8_t +{ + Nothing = 0x00, + UInt8 = 0x01, + UInt16 = 0x02, + UInt32 = 0x03, + UInt64 = 0x04, + UInt128 = 0x05, + UInt256 = 0x06, + Int8 = 0x07, + Int16 = 0x08, + Int32 = 0x09, + Int64 = 0x0A, + Int128 = 0x0B, + Int256 = 0x0C, + Float32 = 0x0D, + Float64 = 0x0E, + Date = 0x0F, + Date32 = 0x10, + DateTimeUTC = 0x11, + DateTimeWithTimezone = 0x12, + DateTime64UTC = 0x13, + DateTime64WithTimezone = 0x14, + String = 0x15, + FixedString = 0x16, + Enum8 = 0x17, + Enum16 = 0x18, + Decimal32 = 0x19, + Decimal64 = 0x1A, + Decimal128 = 0x1B, + Decimal256 = 0x1C, + UUID = 0x1D, + Array = 0x1E, + UnnamedTuple = 0x1F, + NamedTuple = 0x20, + Set = 0x21, + Interval = 0x22, + Nullable = 0x23, + Function = 0x24, + AggregateFunction = 0x25, + LowCardinality = 0x26, + Map = 0x27, + IPv4 = 0x28, + IPv6 = 0x29, + Variant = 0x2A, + Dynamic = 0x2B, + Custom = 0x2C, + Bool = 0x2D, + SimpleAggregateFunction = 0x2E, + Nested = 0x2F, +}; + +BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) +{ + /// By default custom types don't have their own BinaryTypeIndex. + if (type->hasCustomName()) + { + /// Some widely used custom types have separate BinaryTypeIndex for better serialization. + /// Right now it's Bool, SimpleAggregateFunction and Nested types. + /// TODO: Consider adding BinaryTypeIndex for more custom types. + + if (isBool(type)) + return BinaryTypeIndex::Bool; + + if (typeid_cast(type->getCustomName())) + return BinaryTypeIndex::SimpleAggregateFunction; + + if (isNested(type)) + return BinaryTypeIndex::Nested; + + return BinaryTypeIndex::Custom; + } + + switch (type->getTypeId()) + { + case TypeIndex::Nothing: + return BinaryTypeIndex::Nothing; + case TypeIndex::UInt8: + return BinaryTypeIndex::UInt8; + case TypeIndex::UInt16: + return BinaryTypeIndex::UInt16; + case TypeIndex::UInt32: + return BinaryTypeIndex::UInt32; + case TypeIndex::UInt64: + return BinaryTypeIndex::UInt64; + case TypeIndex::UInt128: + return BinaryTypeIndex::UInt128; + case TypeIndex::UInt256: + return BinaryTypeIndex::UInt256; + case TypeIndex::Int8: + return BinaryTypeIndex::Int8; + case TypeIndex::Int16: + return BinaryTypeIndex::Int16; + case TypeIndex::Int32: + return BinaryTypeIndex::Int32; + case TypeIndex::Int64: + return BinaryTypeIndex::Int64; + case TypeIndex::Int128: + return BinaryTypeIndex::Int128; + case TypeIndex::Int256: + return BinaryTypeIndex::Int256; + case TypeIndex::Float32: + return BinaryTypeIndex::Float32; + case TypeIndex::Float64: + return BinaryTypeIndex::Float64; + case TypeIndex::Date: + return BinaryTypeIndex::Date; + case TypeIndex::Date32: + return BinaryTypeIndex::Date32; + case TypeIndex::DateTime: + if (assert_cast(*type).hasExplicitTimeZone()) + return BinaryTypeIndex::DateTimeWithTimezone; + return BinaryTypeIndex::DateTimeUTC; + case TypeIndex::DateTime64: + if (assert_cast(*type).hasExplicitTimeZone()) + return BinaryTypeIndex::DateTime64WithTimezone; + return BinaryTypeIndex::DateTime64UTC; + case TypeIndex::String: + return BinaryTypeIndex::String; + case TypeIndex::FixedString: + return BinaryTypeIndex::FixedString; + case TypeIndex::Enum8: + return BinaryTypeIndex::Enum8; + case TypeIndex::Enum16: + return BinaryTypeIndex::Enum16; + case TypeIndex::Decimal32: + return BinaryTypeIndex::Decimal32; + case TypeIndex::Decimal64: + return BinaryTypeIndex::Decimal64; + case TypeIndex::Decimal128: + return BinaryTypeIndex::Decimal128; + case TypeIndex::Decimal256: + return BinaryTypeIndex::Decimal256; + case TypeIndex::UUID: + return BinaryTypeIndex::UUID; + case TypeIndex::Array: + return BinaryTypeIndex::Array; + case TypeIndex::Tuple: + { + const auto & tuple_type = assert_cast(*type); + if (tuple_type.haveExplicitNames()) + return BinaryTypeIndex::NamedTuple; + return BinaryTypeIndex::UnnamedTuple; + } + case TypeIndex::Set: + return BinaryTypeIndex::Set; + case TypeIndex::Interval: + return BinaryTypeIndex::Interval; + case TypeIndex::Nullable: + return BinaryTypeIndex::Nullable; + case TypeIndex::Function: + return BinaryTypeIndex::Function; + case TypeIndex::AggregateFunction: + return BinaryTypeIndex::AggregateFunction; + case TypeIndex::LowCardinality: + return BinaryTypeIndex::LowCardinality; + case TypeIndex::Map: + return BinaryTypeIndex::Map; + case TypeIndex::Object: + /// Object type will be deprecated and replaced by new implementation. No need to support it here. + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type Object is not supported"); + case TypeIndex::IPv4: + return BinaryTypeIndex::IPv4; + case TypeIndex::IPv6: + return BinaryTypeIndex::IPv6; + case TypeIndex::Variant: + return BinaryTypeIndex::Variant; + case TypeIndex::Dynamic: + return BinaryTypeIndex::Dynamic; + /// JSONPaths is used only during schema inference and cannot be used anywhere else. + case TypeIndex::JSONPaths: + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type JSONPaths is not supported"); + } +} + +template +void encodeEnumValues(const DataTypePtr & type, WriteBuffer & buf) +{ + const auto & enum_type = assert_cast &>(*type); + const auto & values = enum_type.getValues(); + writeVarUInt(values.size(), buf); + for (const auto & [name, value] : values) + { + writeStringBinary(name, buf); + writeBinaryLittleEndian(value, buf); + } +} + +template +DataTypePtr decodeEnum(ReadBuffer & buf) +{ + typename DataTypeEnum::Values values; + size_t size; + readVarUInt(size, buf); + for (size_t i = 0; i != size; ++i) + { + String name; + readStringBinary(name, buf); + T value; + readBinaryLittleEndian(value, buf); + values.emplace_back(name, value); + } + + return std::make_shared>(values); +} + +template +void encodeDecimal(const DataTypePtr & type, WriteBuffer & buf) +{ + const auto & decimal_type = assert_cast &>(*type); + /// Both precision and scale should be less than 76, so we can decode it in 1 byte. + writeBinary(UInt8(decimal_type.getPrecision()), buf); + writeBinary(UInt8(decimal_type.getScale()), buf); +} + +template +DataTypePtr decodeDecimal(ReadBuffer & buf) +{ + UInt8 precision; + readBinary(precision, buf); + UInt8 scale; + readBinary(scale, buf); + return std::make_shared>(precision, scale); +} + +void encodeAggregateFunction(const String & function_name, const Array & parameters, const DataTypes & arguments_types, WriteBuffer & buf) +{ + writeStringBinary(function_name, buf); + writeVarUInt(parameters.size(), buf); + for (const auto & param : parameters) + encodeField(param, buf); + writeVarUInt(arguments_types.size(), buf); + for (const auto & argument_type : arguments_types) + encodeDataType(argument_type, buf); +} + +std::tuple decodeAggregateFunction(ReadBuffer & buf) +{ + String function_name; + readStringBinary(function_name, buf); + size_t num_parameters; + readVarUInt(num_parameters, buf); + Array parameters; + parameters.reserve(num_parameters); + for (size_t i = 0; i != num_parameters; ++i) + parameters.push_back(decodeField(buf)); + size_t num_arguments; + readVarUInt(num_arguments, buf); + DataTypes arguments_types; + arguments_types.reserve(num_arguments); + for (size_t i = 0; i != num_arguments; ++i) + arguments_types.push_back(decodeDataType(buf)); + AggregateFunctionProperties properties; + auto action = NullsAction::EMPTY; + auto function = AggregateFunctionFactory::instance().get(function_name, action, arguments_types, parameters, properties); + return {function, parameters, arguments_types}; +} + +} + +void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) +{ + /// First, write the BinaryTypeIndex byte. + auto binary_type_index = getBinaryTypeIndex(type); + buf.write(UInt8(binary_type_index)); + /// Then, write additional information depending on the data type. + switch (binary_type_index) + { + case BinaryTypeIndex::DateTimeWithTimezone: + { + const auto & datetime_type = assert_cast(*type); + writeStringBinary(datetime_type.getTimeZone().getTimeZone(), buf); + break; + } + case BinaryTypeIndex::DateTime64UTC: + { + const auto & datetime64_type = assert_cast(*type); + /// Maximum scale for DateTime64 is 9, so we can write it as 1 byte. + buf.write(UInt8(datetime64_type.getScale())); + break; + } + case BinaryTypeIndex::DateTime64WithTimezone: + { + const auto & datetime64_type = assert_cast(*type); + buf.write(UInt8(datetime64_type.getScale())); + writeStringBinary(datetime64_type.getTimeZone().getTimeZone(), buf); + break; + } + case BinaryTypeIndex::FixedString: + { + const auto & fixed_string_type = assert_cast(*type); + writeVarUInt(fixed_string_type.getN(), buf); + break; + } + case BinaryTypeIndex::Enum8: + { + encodeEnumValues(type, buf); + break; + } + case BinaryTypeIndex::Enum16: + { + encodeEnumValues(type, buf); + break; + } + case BinaryTypeIndex::Decimal32: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal64: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal128: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal256: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Array: + { + const auto & array_type = assert_cast(*type); + encodeDataType(array_type.getNestedType(), buf); + break; + } + case BinaryTypeIndex::NamedTuple: + { + const auto & tuple_type = assert_cast(*type); + const auto & types = tuple_type.getElements(); + const auto & names = tuple_type.getElementNames(); + writeVarUInt(types.size(), buf); + for (size_t i = 0; i != types.size(); ++i) + { + writeStringBinary(names[i], buf); + encodeDataType(types[i], buf); + } + break; + } + case BinaryTypeIndex::UnnamedTuple: + { + const auto & tuple_type = assert_cast(*type); + const auto & element_types = tuple_type.getElements(); + writeVarUInt(element_types.size(), buf); + for (const auto & element_type : element_types) + encodeDataType(element_type, buf); + break; + } + case BinaryTypeIndex::Interval: + { + const auto & interval_type = assert_cast(*type); + writeBinary(UInt8(interval_type.getKind().kind), buf); + break; + } + case BinaryTypeIndex::Nullable: + { + const auto & nullable_type = assert_cast(*type); + encodeDataType(nullable_type.getNestedType(), buf); + break; + } + case BinaryTypeIndex::Function: + { + const auto & function_type = assert_cast(*type); + const auto & arguments_types = function_type.getArgumentTypes(); + const auto & return_type = function_type.getReturnType(); + writeVarUInt(arguments_types.size(), buf); + for (const auto & argument_type : arguments_types) + encodeDataType(argument_type, buf); + encodeDataType(return_type, buf); + break; + } + case BinaryTypeIndex::LowCardinality: + { + const auto & low_cardinality_type = assert_cast(*type); + encodeDataType(low_cardinality_type.getDictionaryType(), buf); + break; + } + case BinaryTypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + encodeDataType(map_type.getKeyType(), buf); + encodeDataType(map_type.getValueType(), buf); + break; + } + case BinaryTypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + writeVarUInt(variants.size(), buf); + for (const auto & variant : variants) + encodeDataType(variant, buf); + break; + } + case BinaryTypeIndex::Dynamic: + { + const auto & dynamic_type = assert_cast(*type); + /// Maximum number of dynamic types is 255, we can write it as 1 byte. + writeBinary(UInt8(dynamic_type.getMaxDynamicTypes()), buf); + break; + } + case BinaryTypeIndex::AggregateFunction: + { + const auto & aggregate_function_type = assert_cast(*type); + writeVarUInt(aggregate_function_type.getVersion(), buf); + encodeAggregateFunction(aggregate_function_type.getFunctionName(), aggregate_function_type.getParameters(), aggregate_function_type.getArgumentsDataTypes(), buf); + break; + } + case BinaryTypeIndex::SimpleAggregateFunction: + { + const auto & simple_aggregate_function_type = assert_cast(*type->getCustomName()); + encodeAggregateFunction(simple_aggregate_function_type.getFunctionName(), simple_aggregate_function_type.getParameters(), simple_aggregate_function_type.getArgumentsDataTypes(), buf); + break; + } + case BinaryTypeIndex::Nested: + { + const auto & nested_type = assert_cast(*type->getCustomName()); + const auto & elements = nested_type.getElements(); + const auto & names = nested_type.getNames(); + writeVarUInt(elements.size(), buf); + for (size_t i = 0; i != elements.size(); ++i) + { + writeStringBinary(names[i], buf); + encodeDataType(elements[i], buf); + } + break; + } + case BinaryTypeIndex::Custom: + { + const auto & type_name = type->getName(); + writeStringBinary(type_name, buf); + break; + } + default: + break; + } +} + +String encodeDataType(const DataTypePtr & type) +{ + WriteBufferFromOwnString buf; + encodeDataType(type, buf); + return buf.str(); +} + +DataTypePtr decodeDataType(ReadBuffer & buf) +{ + UInt8 type; + readBinary(type, buf); + switch (BinaryTypeIndex(type)) + { + case BinaryTypeIndex::Nothing: + return std::make_shared(); + case BinaryTypeIndex::UInt8: + return std::make_shared(); + case BinaryTypeIndex::Bool: + return DataTypeFactory::instance().get("Bool"); + case BinaryTypeIndex::UInt16: + return std::make_shared(); + case BinaryTypeIndex::UInt32: + return std::make_shared(); + case BinaryTypeIndex::UInt64: + return std::make_shared(); + case BinaryTypeIndex::UInt128: + return std::make_shared(); + case BinaryTypeIndex::UInt256: + return std::make_shared(); + case BinaryTypeIndex::Int8: + return std::make_shared(); + case BinaryTypeIndex::Int16: + return std::make_shared(); + case BinaryTypeIndex::Int32: + return std::make_shared(); + case BinaryTypeIndex::Int64: + return std::make_shared(); + case BinaryTypeIndex::Int128: + return std::make_shared(); + case BinaryTypeIndex::Int256: + return std::make_shared(); + case BinaryTypeIndex::Float32: + return std::make_shared(); + case BinaryTypeIndex::Float64: + return std::make_shared(); + case BinaryTypeIndex::Date: + return std::make_shared(); + case BinaryTypeIndex::Date32: + return std::make_shared(); + case BinaryTypeIndex::DateTimeUTC: + return std::make_shared(); + case BinaryTypeIndex::DateTimeWithTimezone: + { + String time_zone; + readStringBinary(time_zone, buf); + return std::make_shared(time_zone); + } + case BinaryTypeIndex::DateTime64UTC: + { + UInt8 scale; + readBinary(scale, buf); + return std::make_shared(scale); + } + case BinaryTypeIndex::DateTime64WithTimezone: + { + UInt8 scale; + readBinary(scale, buf); + String time_zone; + readStringBinary(time_zone, buf); + return std::make_shared(scale, time_zone); + } + case BinaryTypeIndex::String: + return std::make_shared(); + case BinaryTypeIndex::FixedString: + { + UInt64 size; + readVarUInt(size, buf); + return std::make_shared(size); + } + case BinaryTypeIndex::Enum8: + return decodeEnum(buf); + case BinaryTypeIndex::Enum16: + return decodeEnum(buf); + case BinaryTypeIndex::Decimal32: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal64: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal128: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal256: + return decodeDecimal(buf); + case BinaryTypeIndex::UUID: + return std::make_shared(); + case BinaryTypeIndex::Array: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::NamedTuple: + { + size_t size; + readVarUInt(size, buf); + DataTypes elements; + elements.reserve(size); + Names names; + names.reserve(size); + for (size_t i = 0; i != size; ++i) + { + names.emplace_back(); + readStringBinary(names.back(), buf); + elements.push_back(decodeDataType(buf)); + } + + return std::make_shared(elements, names); + } + case BinaryTypeIndex::UnnamedTuple: + { + size_t size; + readVarUInt(size, buf); + DataTypes elements; + elements.reserve(size); + for (size_t i = 0; i != size; ++i) + elements.push_back(decodeDataType(buf)); + return std::make_shared(elements); + } + case BinaryTypeIndex::Set: + return std::make_shared(); + case BinaryTypeIndex::Interval: + { + UInt8 kind; + readBinary(kind, buf); + return std::make_shared(IntervalKind(IntervalKind::Kind(kind))); + } + case BinaryTypeIndex::Nullable: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::Function: + { + size_t arguments_size; + readVarUInt(arguments_size, buf); + DataTypes arguments; + arguments.reserve(arguments_size); + for (size_t i = 0; i != arguments_size; ++i) + arguments.push_back(decodeDataType(buf)); + auto return_type = decodeDataType(buf); + return std::make_shared(arguments, return_type); + } + case BinaryTypeIndex::LowCardinality: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::Map: + { + auto key_type = decodeDataType(buf); + auto value_type = decodeDataType(buf); + return std::make_shared(key_type, value_type); + } + case BinaryTypeIndex::IPv4: + return std::make_shared(); + case BinaryTypeIndex::IPv6: + return std::make_shared(); + case BinaryTypeIndex::Variant: + { + size_t size; + readVarUInt(size, buf); + DataTypes variants; + variants.reserve(size); + for (size_t i = 0; i != size; ++i) + variants.push_back(decodeDataType(buf)); + return std::make_shared(variants); + } + case BinaryTypeIndex::Dynamic: + { + UInt8 max_dynamic_types; + readBinary(max_dynamic_types, buf); + return std::make_shared(max_dynamic_types); + } + case BinaryTypeIndex::AggregateFunction: + { + size_t version; + readVarUInt(version, buf); + const auto & [function, parameters, arguments_types] = decodeAggregateFunction(buf); + return std::make_shared(function, arguments_types, parameters, version); + } + case BinaryTypeIndex::SimpleAggregateFunction: + { + const auto & [function, parameters, arguments_types] = decodeAggregateFunction(buf); + return createSimpleAggregateFunctionType(function, arguments_types, parameters); + } + case BinaryTypeIndex::Nested: + { + size_t size; + readVarUInt(size, buf); + Names names; + names.reserve(size); + DataTypes elements; + elements.reserve(size); + for (size_t i = 0; i != size; ++i) + { + names.emplace_back(); + readStringBinary(names.back(), buf); + elements.push_back(decodeDataType(buf)); + } + + return createNested(elements, names); + } + case BinaryTypeIndex::Custom: + { + String type_name; + readStringBinary(type_name, buf); + return DataTypeFactory::instance().get(type_name); + } + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown type code: {0:#04x}", UInt64(type)); +} + +DataTypePtr decodeDataType(const String & data) +{ + ReadBufferFromString buf(data); + return decodeDataType(buf); +} + +} diff --git a/src/DataTypes/DataTypesBinaryEncoding.h b/src/DataTypes/DataTypesBinaryEncoding.h new file mode 100644 index 00000000000..d02e7f85942 --- /dev/null +++ b/src/DataTypes/DataTypesBinaryEncoding.h @@ -0,0 +1,118 @@ +#pragma once + +#include + +namespace DB +{ + +/** + +Binary encoding for ClickHouse data types: +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ClickHouse data type | Binary encoding | +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Nothing | 0x00 | +| UInt8 | 0x01 | +| UInt16 | 0x02 | +| UInt32 | 0x03 | +| UInt64 | 0x04 | +| UInt128 | 0x05 | +| UInt256 | 0x06 | +| Int8 | 0x07 | +| Int16 | 0x08 | +| Int32 | 0x09 | +| Int64 | 0x0A | +| Int128 | 0x0B | +| Int256 | 0x0C | +| Float32 | 0x0D | +| Float64 | 0x0E | +| Date | 0x0F | +| Date32 | 0x10 | +| DateTime | 0x11 | +| DateTime(time_zone) | 0x12 | +| DateTime64(P) | 0x13 | +| DateTime64(P, time_zone) | 0x14 | +| String | 0x15 | +| FixedString(N) | 0x16 | +| Enum8 | 0x17... | +| Enum16 | 0x18...> | +| Decimal32(P, S) | 0x19 | +| Decimal64(P, S) | 0x1A | +| Decimal128(P, S) | 0x1B | +| Decimal256(P, S) | 0x1C | +| UUID | 0x1D | +| Array(T) | 0x1E | +| Tuple(T1, ..., TN) | 0x1F... | +| Tuple(name1 T1, ..., nameN TN) | 0x20... | +| Set | 0x21 | +| Interval | 0x22 | +| Nullable(T) | 0x23 | +| Function | 0x24... | +| AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x25...... | +| LowCardinality(T) | 0x26 | +| Map(K, V) | 0x27 | +| IPv4 | 0x28 | +| IPv6 | 0x29 | +| Variant(T1, ..., TN) | 0x2A... | +| Dynamic(max_types=N) | 0x2B | +| Custom type (Ring, Polygon, etc) | 0x2C | +| Bool | 0x2D | +| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2E...... | +| Nested(name1 T1, ..., nameN TN) | 0x2F... | +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + +Interval kind binary encoding: +|---------------|-----------------| +| Interval kind | Binary encoding | +|---------------|-----------------| +| Nanosecond | 0x00 | +| Microsecond | 0x01 | +| Millisecond | 0x02 | +| Second | 0x03 | +| Minute | 0x04 | +| Hour | 0x05 | +| Day | 0x06 | +| Week | 0x07 | +| Month | 0x08 | +| Quarter | 0x09 | +| Year | 0x1A | +|---------------|-----------------| + +Aggregate function parameter binary encoding (binary encoding of a Field, see src/Common/FieldBinaryEncoding.h): +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Parameter type | Binary encoding | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Null | 0x00 | +| UInt64 | 0x01 | +| Int64 | 0x02 | +| UInt128 | 0x03 | +| Int128 | 0x04 | +| UInt128 | 0x05 | +| Int128 | 0x06 | +| Float64 | 0x07 | +| Decimal32 | 0x08 | +| Decimal64 | 0x09 | +| Decimal128 | 0x0A | +| Decimal256 | 0x0B | +| String | 0x0C | +| Array | 0x0D... | +| Tuple | 0x0E... | +| Map | 0x0F... | +| IPv4 | 0x10 | +| IPv6 | 0x11 | +| UUID | 0x12 | +| Bool | 0x13 | +| Object | 0x14... | +| AggregateFunctionState | 0x15 | +| Negative infinity | 0xFE | +| Positive infinity | 0xFF | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +*/ + +String encodeDataType(const DataTypePtr & type); +void encodeDataType(const DataTypePtr & type, WriteBuffer & buf); + +DataTypePtr decodeDataType(const String & data); +DataTypePtr decodeDataType(ReadBuffer & buf); + +} diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 1c9715bbf53..1cb64b65d3a 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) { - has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData(); }; forEachSubcolumn(callback, data); return has_dynamic_subcolumns; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index bbb1d1a6cd1..7642a6619b3 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const if (type == VariantElement) return fmt::format("VariantElement({})", variant_element_name); + if (type == VariantElementNullMap) + return fmt::format("VariantElementNullMap({}.null)", variant_element_name); + return String(magic_enum::enum_name(type)); } @@ -195,6 +198,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == Substream::VariantElementNullMap) + stream_name += "." + it->variant_element_name + ".null"; else if (it->type == SubstreamType::DynamicStructure) stream_name += ".dynamic_structure"; } @@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref return path[last_elem].type == Substream::NullMap || path[last_elem].type == Substream::TupleElement || path[last_elem].type == Substream::ArraySizes - || path[last_elem].type == Substream::VariantElement; + || path[last_elem].type == Substream::VariantElement + || path[last_elem].type == Substream::VariantElementNullMap; } ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 914ff9cf4a2..255dbbfadd2 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -184,6 +184,7 @@ public: VariantOffsets, VariantElements, VariantElement, + VariantElementNullMap, DynamicData, DynamicStructure, @@ -256,6 +257,11 @@ public: bool position_independent_encoding = true; + /// True if data type names should be serialized in binary encoding. + bool data_types_binary_encoding = false; + + bool use_compact_variant_discriminators_serialization = false; + enum class DynamicStatisticsMode { NONE, /// Don't write statistics. @@ -275,6 +281,9 @@ public: bool position_independent_encoding = true; + /// True if data type names should be deserialized in binary encoding. + bool data_types_binary_encoding = false; + bool native_format = false; /// If not zero, may be used to avoid reallocations while reading column of String type. @@ -434,6 +443,9 @@ protected: template State * checkAndGetState(const StatePtr & state) const; + template + static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization); + [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const; }; @@ -444,10 +456,16 @@ using SubstreamType = ISerialization::Substream::Type; template State * ISerialization::checkAndGetState(const StatePtr & state) const +{ + return checkAndGetState(state, this); +} + +template +State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization) { if (!state) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Got empty state for {}", demangle(typeid(*this).name())); + "Got empty state for {}", demangle(typeid(*serialization).name())); auto * state_concrete = typeid_cast(state.get()); if (!state_concrete) @@ -455,7 +473,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const auto & state_ref = *state; throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid State for {}. Expected: {}, got {}", - demangle(typeid(*this).name()), + demangle(typeid(*serialization).name()), demangle(typeid(State).name()), demangle(typeid(state_ref).name())); } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index ac7b8f4d084..b7d43332085 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -42,13 +42,13 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, con { size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); field = Array(); Array & arr = field.get(); @@ -82,13 +82,13 @@ void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); IColumn & nested_column = column_array.getData(); diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 6351ff0ca0b..7609ffc91ca 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -109,7 +111,10 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( const auto & variant_column = column_dynamic.getVariantColumn(); /// Write internal Variant type name. - writeStringBinary(dynamic_state->variant_type->getName(), *stream); + if (settings.data_types_binary_encoding) + encodeDataType(dynamic_state->variant_type, *stream); + else + writeStringBinary(dynamic_state->variant_type->getName(), *stream); /// Write statistics in prefix if needed. if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX) @@ -178,9 +183,16 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); /// Read internal Variant type name. - String data_type_name; - readStringBinary(data_type_name, *structure_stream); - structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + if (settings.data_types_binary_encoding) + { + structure_state->variant_type = decodeDataType(*structure_stream); + } + else + { + String data_type_name; + readStringBinary(data_type_name, *structure_stream); + structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + } const auto * variant_type = typeid_cast(structure_state->variant_type.get()); if (!variant_type) throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName()); @@ -280,33 +292,27 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( void SerializationDynamic::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - UInt8 null_bit = field.isNull(); - writeBinary(null_bit, ostr); - if (null_bit) + /// Serialize NULL as Nothing type with no value. + if (field.isNull()) + { + encodeDataType(std::make_shared(), ostr); return; + } auto field_type = applyVisitor(FieldToDataType(), field); - auto field_type_name = field_type->getName(); - writeVarUInt(field_type_name.size(), ostr); - writeString(field_type_name, ostr); + encodeDataType(field_type, ostr); field_type->getDefaultSerialization()->serializeBinary(field, ostr, settings); } void SerializationDynamic::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { - UInt8 null_bit; - readBinary(null_bit, istr); - if (null_bit) + auto field_type = decodeDataType(istr); + if (isNothing(field_type)) { field = Null(); return; } - size_t field_type_name_size; - readVarUInt(field_type_name_size, istr); - String field_type_name(field_type_name_size, 0); - istr.readStrict(field_type_name.data(), field_type_name_size); - auto field_type = DataTypeFactory::instance().get(field_type_name); field_type->getDefaultSerialization()->deserializeBinary(field, istr, settings); } @@ -317,15 +323,15 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu const auto & variant_column = dynamic_column.getVariantColumn(); auto global_discr = variant_column.globalDiscriminatorAt(row_num); - UInt8 null_bit = global_discr == ColumnVariant::NULL_DISCRIMINATOR; - writeBinary(null_bit, ostr); - if (null_bit) + /// Serialize NULL as Nothing type with no value. + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + encodeDataType(std::make_shared(), ostr); return; + } const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(global_discr); - const auto & variant_type_name = variant_info.variant_names[global_discr]; - writeVarUInt(variant_type_name.size(), ostr); - writeString(variant_type_name, ostr); + encodeDataType(variant_type, ostr); variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); } @@ -346,30 +352,23 @@ static void deserializeVariant( void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { auto & dynamic_column = assert_cast(column); - UInt8 null_bit; - readBinary(null_bit, istr); - if (null_bit) + auto variant_type = decodeDataType(istr); + if (isNothing(variant_type)) { dynamic_column.insertDefault(); return; } - size_t variant_type_name_size; - readVarUInt(variant_type_name_size, istr); - String variant_type_name(variant_type_name_size, 0); - istr.readStrict(variant_type_name.data(), variant_type_name_size); - + auto variant_type_name = variant_type->getName(); const auto & variant_info = dynamic_column.getVariantInfo(); auto it = variant_info.variant_name_to_discriminator.find(variant_type_name); if (it != variant_info.variant_name_to_discriminator.end()) { - const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(it->second); deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); return; } /// We don't have this variant yet. Let's try to add it. - auto variant_type = DataTypeFactory::instance().get(variant_type_name); if (dynamic_column.addNewVariant(variant_type)) { auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index dafd6d663b0..211f0ac9377 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) { settings.path.push_back(Substream::DynamicData); - dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + if (is_null_map_subcolumn) + dynamic_element_state->variant_serialization = std::make_shared(dynamic_element_name, *global_discr); + else + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } @@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( SubstreamsCache * cache) const { if (!state) + { + if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } + return; + } auto * dynamic_element_state = checkAndGetState(state); @@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } + else if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } else { auto mutable_column = result_column->assumeMutable(); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 2ddc3324139..127d14a55e0 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -13,11 +13,11 @@ private: /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; + bool is_null_map_subcolumn; public: - SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) - : SerializationWrapper(nested_) - , dynamic_element_name(dynamic_element_name_) + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false) + : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_) { } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 70fe5182ade..0bef3c7d79d 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -55,13 +55,13 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const { size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large map size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); field = Map(); Map & map = field.get(); map.reserve(size); diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 9e39ab23709..9e523d0d745 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -33,13 +33,13 @@ namespace ErrorCodes void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const String & s = field.get(); - if (settings.max_binary_string_size && s.size() > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && s.size() > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", s.size(), - settings.max_binary_string_size); + settings.binary.max_binary_string_size); writeVarUInt(s.size(), ostr); writeString(s, ostr); @@ -50,13 +50,13 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co { UInt64 size; readVarUInt(size, istr); - if (settings.max_binary_string_size && size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); field = String(); String & s = field.get(); @@ -68,13 +68,13 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co void SerializationString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const StringRef & s = assert_cast(column).getDataAt(row_num); - if (settings.max_binary_string_size && s.size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && s.size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", s.size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); writeVarUInt(s.size, ostr); writeString(s, ostr); @@ -89,13 +89,13 @@ void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr, UInt64 size; readVarUInt(size, istr); - if (settings.max_binary_string_size && size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); size_t old_chars_size = data.size(); size_t offset = old_chars_size + size + 1; diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index b386fd8ab45..e4d71e84cc7 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -30,12 +31,18 @@ namespace ErrorCodes struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { - std::vector states; + explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + { + } + + SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; + std::vector variant_states; }; struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState { - std::vector states; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; + std::vector variant_states; }; void SerializationVariant::enumerateStreams( @@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams( for (size_t i = 0; i < variants.size(); ++i) { - settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr; + settings.path.back().creator = std::make_shared( + local_discriminators, + variant_names[i], + i, + column_variant ? column_variant->localDiscriminatorByGlobal(i) : i, + !type || type->canBeInsideNullable() || type->lowCardinality()); auto variant_data = SubstreamData(variants[i]) - .withType(type_variant ? type_variant->getVariant(i) : nullptr) + .withType(type) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr); + .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } + /// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null. + /// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable + /// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization + /// SerializationVariantElementNullMap. + auto null_map_data = SubstreamData(std::make_shared>()) + .withType(type_variant ? std::make_shared() : nullptr) + .withColumn(column_variant ? ColumnUInt8::create() : nullptr); + + for (size_t i = 0; i < variants.size(); ++i) + { + settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + settings.path.push_back(Substream::VariantElementNullMap); + settings.path.back().variant_element_name = variant_names[i]; + settings.path.back().data = null_map_data; + callback(settings.path); + settings.path.pop_back(); + } + settings.path.pop_back(); } @@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - const ColumnVariant & col = assert_cast(column); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); - auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + if (!discriminators_stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix"); + + UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC; + writeBinaryLittleEndian(mode, *discriminators_stream); + + const ColumnVariant & col = assert_cast(column); + auto variant_state = std::make_shared(mode); + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]); settings.path.pop_back(); } @@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]); settings.path.pop_back(); } settings.path.pop_back(); @@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + variant_state->discriminators_state = discriminators_state; + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } @@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( state = std::move(variant_state); } +ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStatePtr discriminators_state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); + } + + settings.path.pop_back(); + return discriminators_state; +} + void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( const IColumn & column, @@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian auto * variant_state = checkAndGetState(state); - /// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate + /// Don't write anything if column is empty. + if (limit == 0) + return; + + /// Write number of rows in this granule in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeVarUInt(UInt64(limit), *discriminators_stream); + + /// If column has only one none empty discriminators and no NULLs we don't need to + /// calculate limits for variants and use provided offset/limit. + if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) + { + auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); + + /// In compact mode write the format of the granule and single non-empty discriminator. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + /// For basic mode just serialize this discriminator limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + + settings.path.push_back(Substream::VariantElements); + addVariantElementToPath(settings.path, non_empty_global_discr); + /// We can use the same offset/limit as for whole Variant column + variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]); + variants_statistics[variant_names[non_empty_global_discr]] += limit; + settings.path.pop_back(); + settings.path.pop_back(); + return; + } + /// If column has only NULLs, just serialize NULL discriminators. + else if (col.hasOnlyNulls()) + { + /// In compact mode write single NULL_DISCRIMINATOR. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// In basic mode write NULL_DISCRIMINATOR limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + return; + } + + /// If offset = 0 and limit == col.size() we don't need to calculate /// offsets and limits for variants and need to just serialize whole columns. - if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls()) + if ((offset == 0 && limit == col.size())) { /// First, serialize discriminators. - /// If we have only NULLs or local and global discriminators are the same, just serialize the column as is. - if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder()) + /// Here we are sure that column contains different discriminators, use plain granule format in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + + /// If local and global discriminators are the same, just serialize the column as is. + if (col.hasGlobalVariantsOrder()) { SerializationNumber().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit); } @@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]); variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); settings.path.pop_back(); } @@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian return; } - /// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant. - if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// First, serialize discriminators. - /// We know that all discriminators are the same, so we just need to serialize this discriminator limit times. - auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); - for (size_t i = 0; i != limit; ++i) - writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); - - /// Second, serialize non-empty variant (other variants are empty and we can skip their serialization). - settings.path.push_back(Substream::VariantElements); - addVariantElementToPath(settings.path, non_empty_global_discr); - /// We can use the same offset/limit as for whole Variant column - variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); - variants_statistics[variant_names[non_empty_global_discr]] += limit; - settings.path.pop_back(); - settings.path.pop_back(); - return; - } - /// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant. const auto & local_discriminators = col.getLocalDiscriminators(); const auto & offsets = col.getOffsets(); std::vector> variant_offsets_and_limits(variants.size(), {0, 0}); size_t end = offset + limit; + size_t num_non_empty_variants_in_range = 0; + ColumnVariant::Discriminator last_non_empty_variant_discr = 0; for (size_t i = offset; i < end; ++i) { auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]); - writeBinaryLittleEndian(global_discr, *discriminators_stream); - if (global_discr != ColumnVariant::NULL_DISCRIMINATOR) { /// If we see this discriminator for the first time, update offset @@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[global_discr].first = offsets[i]; /// Update limit for this discriminator. ++variant_offsets_and_limits[global_discr].second; + ++num_non_empty_variants_in_range; + last_non_empty_variant_discr = global_discr; } } + /// In basic mode just serialize discriminators as is row by row. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + { + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// In compact mode check if we have the same discriminator for all rows in this granule. + /// First, check if all values in granule are NULLs. + else if (num_non_empty_variants_in_range == 0) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// Then, check if there is only 1 variant and no NULLs in this granule. + else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream); + } + /// Otherwise there are different discriminators in this granule. + else + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// Serialize variants in global order. settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) @@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[i].first, variant_offsets_and_limits[i].second, settings, - variant_state->states[i]); + variant_state->variant_states[i]); variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; settings.path.pop_back(); } @@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( /// First, deserialize discriminators. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariant * variant_state = nullptr; + std::vector variant_limits; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_state = checkAndGetState(state); col.getLocalDiscriminatorsPtr() = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_state->discriminators_state); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state); - SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr()); } + /// It may happen that there is no such stream, in this case just do nothing. + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Second, calculate limits for each variant by iterating through new discriminators. - std::vector variant_limits(variants.size(), 0); - auto & discriminators_data = col.getLocalDiscriminators(); - size_t discriminators_offset = discriminators_data.size() - limit; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + /// Second, calculate limits for each variant by iterating through new discriminators + /// if we didn't do it during discriminators deserialization. + if (variant_limits.empty()) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr != ColumnVariant::NULL_DISCRIMINATOR) - ++variant_limits[discr]; + variant_limits.resize(variants.size(), 0); + auto & discriminators_data = col.getLocalDiscriminators(); + + /// We can actually read less than limit discriminators and we cannot determine the actual number of read rows + /// by discriminators column as it could be taken from the substreams cache. And we need actual number of read + /// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache + /// or not by comparing it with discriminators column size (they should be the same when offsets are in cache). + /// If offsets are not in the cache, we can use it's size to determine the actual number of read rows. + size_t num_new_discriminators = limit; + size_t offsets_size = col.getOffsetsPtr()->size(); + if (discriminators_data.size() > offsets_size) + num_new_discriminators = discriminators_data.size() - offsets_size; + size_t discriminators_offset = discriminators_data.size() - num_new_discriminators; + + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } } /// Now we can deserialize variants according to their limits. - auto * variant_state = checkAndGetState(state); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } settings.path.pop_back(); @@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( } else { - auto & offsets = col.getOffsets(); - offsets.reserve(offsets.size() + limit); std::vector variant_offsets; variant_offsets.reserve(variants.size()); + size_t num_non_empty_variants = 0; + ColumnVariant::Discriminator last_non_empty_discr = 0; for (size_t i = 0; i != variants.size(); ++i) - variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); - - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr == ColumnVariant::NULL_DISCRIMINATOR) - offsets.emplace_back(); - else - offsets.push_back(variant_offsets[discr]++); + if (variant_limits[i]) + { + ++num_non_empty_variants; + last_non_empty_discr = i; + } + + variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); + } + + auto & discriminators_data = col.getLocalDiscriminators(); + auto & offsets = col.getOffsets(); + size_t num_new_offsets = discriminators_data.size() - offsets.size(); + offsets.reserve(offsets.size() + num_new_offsets); + /// If there are only NULLs were read, fill offsets with 0. + if (num_non_empty_variants == 0) + { + offsets.resize_fill(discriminators_data.size(), 0); + } + /// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant. + else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets) + { + size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets; + for (size_t i = 0; i != num_new_offsets; ++i) + offsets.push_back(first_offset + i); + } + /// Otherwise iterate through discriminators and fill offsets accordingly. + else + { + size_t start = offsets.size(); + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + offsets.emplace_back(); + else + offsets.push_back(variant_offsets[discr]++); + } } addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr()); @@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } +std::vector SerializationVariant::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const +{ + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + state.remaining_rows_in_granule = 0; + + /// Calculate limits for variants during discriminators deserialization. + std::vector variant_limits(variants.size(), 0); + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (state.remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limits; + + readDiscriminatorsGranuleStart(state, stream); + } + + size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule); + if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, state.compact_discr); + if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR) + variant_limits[state.compact_discr] += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } + } + + state.remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limits; +} + +void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream) +{ + UInt64 granule_size; + readVarUInt(granule_size, *stream); + state.remaining_rows_in_granule = granule_size; + UInt8 granule_format; + readBinaryLittleEndian(granule_format, *stream); + state.granule_format = static_cast(granule_format); + if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + readBinaryLittleEndian(state.compact_discr, *stream); +} + void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const { path.push_back(Substream::VariantElement); diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index b6aa1534538..af89632cf81 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -2,10 +2,18 @@ #include #include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + + /// Class for serializing/deserializing column with Variant type. /// It supports both text and binary bulk serializations/deserializations. /// @@ -18,6 +26,17 @@ namespace DB /// /// During binary bulk serialization it transforms local discriminators /// to global and serializes them into a separate stream VariantDiscriminators. +/// There are 2 modes of serialising discriminators: +/// Basic mode, when all discriminators are serialized as is row by row. +/// Compact mode, when we avoid writing the same discriminators in granules when there is +/// only one variant (or only NULLs) in the granule. +/// In compact mode we serialize granules in the following format: +/// +/// There are 2 different formats of granule - plain and compact. +/// Plain format is used when there are different discriminators in this granule, +/// in this format all discriminators are serialized as is row by row. +/// Compact format is used when all discriminators are the same in this granule, +/// in this case only this single discriminator is serialized. /// Each variant is serialized into a separate stream with path VariantElements/VariantElement /// (VariantElements stream is needed for correct sub-columns creation). We store and serialize /// variants in a sparse form (the size of a variant column equals to the number of its discriminator @@ -32,6 +51,25 @@ namespace DB class SerializationVariant : public ISerialization { public: + struct DiscriminatorsSerializationMode + { + enum Value + { + BASIC = 0, /// Store the whole discriminators column. + COMPACT = 1, /// Don't write discriminators in granule if all of them are the same. + }; + + static void checkMode(UInt64 mode) + { + if (mode > Value::COMPACT) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column."); + } + + explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast(mode)) { checkMode(mode); } + + Value value; + }; + using VariantSerializations = std::vector; explicit SerializationVariant( @@ -123,8 +161,44 @@ public: static std::vector getVariantsDeserializeTextOrder(const DataTypes & variant_types); private: + friend SerializationVariantElement; + friend SerializationVariantElementNullMap; + void addVariantElementToPath(SubstreamPath & path, size_t i) const; + enum CompactDiscriminatorsGranuleFormat + { + PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row. + COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value. + }; + + struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState + { + explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) + { + } + + DiscriminatorsSerializationMode mode; + + /// Deserialize state of currently read granule in compact mode. + CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN; + size_t remaining_rows_in_granule = 0; + ColumnVariant::Discriminator compact_discr = 0; + }; + + static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache); + + std::vector deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const; + + static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream); + bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index ec0b4019c2f..8ceab17cba4 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -12,7 +13,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState +struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { /// During deserialization discriminators and variant streams can be shared. /// For example we can read several variant elements together: "select v.UInt32, v.String from table", @@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria /// substream cache correctly. ColumnPtr discriminators; ColumnPtr variant; - + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; @@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary void SerializationVariantElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_element_state = std::make_shared(); + variant_element_state->discriminators_state = discriminators_state; addVariantToPath(settings.path); nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); @@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - auto * variant_element_state = checkAndGetState(state); - /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr; + std::optional variant_limit; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_element_state = checkAndGetState(state); variant_element_state->discriminators = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_element_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_element_state->discriminators_state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); - SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = deserializeCompactDiscriminators( + variant_element_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_state->discriminators_state, + this); + addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Iterate through new discriminators to calculate the limit for our variant. + /// We could read less than limit discriminators, but we will need actual number of read rows later. + size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size(); + + /// Iterate through new discriminators to calculate the limit for our variant + /// if we didn't do it during discriminators deserialization. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); - size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - size_t variant_limit = 0; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); + size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators; + if (!variant_limit) + { + variant_limit = 0; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + *variant_limit += (discriminators_data[i] == variant_discriminator); + } /// Now we know the limit for our variant and can deserialize it. @@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( auto & nullable_column = assert_cast(*mutable_column); NullMap & null_map = nullable_column.getNullMapData(); /// If we have only our discriminator in range, fill null map with 0. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - null_map.resize_fill(null_map.size() + limit, 0); + null_map.resize_fill(null_map.size() + num_new_discriminators, 0); } /// If no our discriminator in current range, fill null map with 1. else if (variant_limit == 0) { - null_map.resize_fill(null_map.size() + limit, 1); + null_map.resize_fill(null_map.size() + num_new_discriminators, 1); } /// Otherwise we should iterate through discriminators to fill null map. else { - null_map.reserve(null_map.size() + limit); + null_map.reserve(null_map.size() + num_new_discriminators); for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) null_map.push_back(discriminators_data[i] != variant_discriminator); } @@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// If nothing to deserialize, just insert defaults. if (variant_limit == 0) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache); + nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); /// If nothing was deserialized when variant_limit > 0 @@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// In this case we should just insert default values. if (variant_element_state->variant->empty()) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } - size_t variant_offset = variant_element_state->variant->size() - variant_limit; + size_t variant_offset = variant_element_state->variant->size() - *variant_limit; /// If we have only our discriminator in range, insert the whole range to result column. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit); + mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit); } /// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator. else @@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } } +size_t SerializationVariantElement::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + DB::ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization) +{ + auto * discriminators_state = checkAndGetState(discriminators_state_, serialization); + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + discriminators_state->remaining_rows_in_granule = 0; + + /// Calculate our variant limit during discriminators deserialization. + size_t variant_limit = 0; + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (discriminators_state->remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limit; + + SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream); + } + + size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule); + if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr); + if (discriminators_state->compact_discr == variant_discriminator) + variant_limit += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); + } + + discriminators_state->remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limit; +} + void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const { path.push_back(Substream::VariantElements); @@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_) + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_) : local_discriminators(local_discriminators_) , variant_element_name(variant_element_name_) , global_variant_discriminator(global_variant_discriminator_) , local_variant_discriminator(local_variant_discriminator_) + , make_nullable(make_nullable_) { } DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const { - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; } SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const @@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Case when original Variant column contained only one non-empty variant and no NULLs. /// In this case just use this variant. if (prev->size() == local_discriminators->size()) - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; /// If this variant is empty, fill result column with default values. if (prev->empty()) { - auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty(); + auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty(); res->insertManyDefaults(local_discriminators->size()); return res; } @@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Now we can create new column from null-map and variant column using IColumn::expand. auto res_column = IColumn::mutate(prev); - /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), + /// Special case for LowCardinality when we want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first /// convert our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). - if (prev->lowCardinality()) + if (make_nullable && prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); res_column->expand(null_map, /*inverted = */ true); - if (res_column->canBeInsideNullable()) + if (make_nullable && prev->canBeInsideNullable()) { auto null_map_col = ColumnUInt8::create(); null_map_col->getData() = std::move(null_map); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index 0ce0a72e250..69101aea0f5 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -9,6 +9,7 @@ namespace DB { class SerializationVariant; +class SerializationVariantElementNullMap; /// Serialization for Variant element when we read it as a subcolumn. class SerializationVariantElement final : public SerializationWrapper @@ -66,12 +67,14 @@ public: const String variant_element_name; const ColumnVariant::Discriminator global_variant_discriminator; const ColumnVariant::Discriminator local_variant_discriminator; + bool make_nullable; VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_); + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_); DataTypePtr create(const DataTypePtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; @@ -79,6 +82,18 @@ public: }; private: friend SerializationVariant; + friend SerializationVariantElementNullMap; + + struct DeserializeBinaryBulkStateVariantElement; + + static size_t deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization); void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp new file mode 100644 index 00000000000..f30da4fecf9 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState +{ + /// During deserialization discriminators streams can be shared. + /// For example we can read several variant elements together: "select v.UInt32, v.String.null from table", + /// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table". + /// To read the same column from the same stream more than once we use substream cache, + /// but this cache stores the whole column, not only the current range. + /// During deserialization of variant elements or their subcolumns discriminators column is not stored + /// in the result column, so we need to store them inside deserialization state, so we can use + /// substream cache correctly. + ColumnPtr discriminators; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; +}; + +void SerializationVariantElementNullMap::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData &) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix( + const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + + auto variant_element_null_map_state = std::make_shared(); + variant_element_null_map_state->discriminators_state = std::move(discriminators_state); + state = std::move(variant_element_null_map_state); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams( + const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + /// Deserialize discriminators from Variant column. + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr; + std::optional variant_limit; + if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + variant_element_null_map_state->discriminators = cached_discriminators; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState( + variant_element_null_map_state->discriminators_state); + + /// If we started to read a new column, reinitialize discriminators column in deserialization state. + if (!variant_element_null_map_state->discriminators || result_column->empty()) + variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk( + *variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = SerializationVariantElement::deserializeCompactDiscriminators( + variant_element_null_map_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_null_map_state->discriminators_state, + this); + + addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators); + } + else + { + /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column). + /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead. + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + settings.path.pop_back(); + return; + } + settings.path.pop_back(); + + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + /// Check if there are no such variant in read range. + if (variant_limit && *variant_limit == 0) + { + data.resize_fill(data.size() + limit, 1); + } + /// Check if there is only our variant in read range. + else if (variant_limit && *variant_limit == limit) + { + data.resize_fill(data.size() + limit, 0); + } + /// Iterate through new discriminators to calculate the null map of our variant. + else + { + const auto & discriminators_data + = assert_cast(*variant_element_null_map_state->discriminators).getData(); + size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + data.push_back(discriminators_data[i] != variant_discriminator); + } +} + +SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_) + : local_discriminators(local_discriminators_) + , variant_element_name(variant_element_name_) + , global_variant_discriminator(global_variant_discriminator_) + , local_variant_discriminator(local_variant_discriminator_) +{ +} + +DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const +{ + return std::make_shared(); +} + +SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const +{ + return std::make_shared(variant_element_name, global_variant_discriminator); +} + +ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const +{ + /// Iterate through discriminators and create null-map for our variant. + auto null_map_col = ColumnUInt8::create(); + auto & null_map_data = null_map_col->getData(); + null_map_data.reserve(local_discriminators->size()); + const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); + for (auto local_discr : local_discriminators_data) + null_map_data.push_back(local_discr != local_variant_discriminator); + + return null_map_col; +} + + +} diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.h b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h new file mode 100644 index 00000000000..cd81b445189 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class SerializationVariant; +class SerializationVariantElement; + +/// Serialization for Variant element null map when we read it as a subcolumn. +/// For example, variant.UInt64.null. +/// It requires separate serialization because there is no actual Nullable column +/// and we should construct null map from variant discriminators. +/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement, +/// but differs in that there is no need to read the actual data of the variant, only discriminators. +class SerializationVariantElementNullMap final : public SimpleTextSerialization +{ +public: + SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_) + : variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + + struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr local_discriminators; + const String variant_element_name; + const ColumnVariant::Discriminator global_variant_discriminator; + const ColumnVariant::Discriminator local_variant_discriminator; + + VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_); + + DataTypePtr create(const DataTypePtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + }; +private: + [[noreturn]] static void throwNoSerialization() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn"); + } + + friend SerializationVariant; + friend SerializationVariantElement; + + /// To be able to deserialize Variant element null map as a subcolumn + /// we need variant element type name and global discriminator. + String variant_element_name; + ColumnVariant::Discriminator variant_discriminator; + +}; + +} diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp index 0ae325871fb..033a6ea8a4a 100644 --- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp +++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp @@ -72,8 +72,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) DataTypePtr type = DataTypeFactory::instance().get(data_type); FormatSettings settings; - settings.max_binary_string_size = 100; - settings.max_binary_array_size = 100; + settings.binary.max_binary_string_size = 100; + settings.binary.max_binary_string_size = 100; Field field; type->getDefaultSerialization()->deserializeBinary(field, in, settings); diff --git a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp new file mode 100644 index 00000000000..4d0bfc67183 --- /dev/null +++ b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +namespace DB::ErrorCodes +{ +extern const int UNSUPPORTED_METHOD; +} + + +void check(const DataTypePtr & type) +{ +// std::cerr << "Check " << type->getName() << "\n"; + WriteBufferFromOwnString ostr; + encodeDataType(type, ostr); + ReadBufferFromString istr(ostr.str()); + DataTypePtr decoded_type = decodeDataType(istr); + ASSERT_TRUE(istr.eof()); + ASSERT_EQ(type->getName(), decoded_type->getName()); + ASSERT_TRUE(type->equals(*decoded_type)); +} + +GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) +{ + tryRegisterAggregateFunctions(); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared("EST")); + check(std::make_shared("CET")); + check(std::make_shared(3)); + check(std::make_shared(3, "EST")); + check(std::make_shared(3, "CET")); + check(std::make_shared()); + check(std::make_shared(10)); + check(DataTypeFactory::instance().get("Enum8('a' = 1, 'b' = 2, 'c' = 3, 'd' = -128)")); + check(DataTypeFactory::instance().get("Enum16('a' = 1, 'b' = 2, 'c' = 3, 'd' = -1000)")); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Array(UInt32)")); + check(DataTypeFactory::instance().get("Array(Array(Array(UInt32)))")); + check(DataTypeFactory::instance().get("Tuple(UInt32, String, UUID)")); + check(DataTypeFactory::instance().get("Tuple(UInt32, String, Tuple(UUID, Date, IPv4))")); + check(DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 UUID)")); + check(DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 Tuple(c4 UUID, c5 Date, c6 IPv4))")); + check(std::make_shared()); + check(std::make_shared(IntervalKind::Kind::Nanosecond)); + check(std::make_shared(IntervalKind::Kind::Microsecond)); + check(DataTypeFactory::instance().get("Nullable(UInt32)")); + check(DataTypeFactory::instance().get("Nullable(Nothing)")); + check(DataTypeFactory::instance().get("Nullable(UUID)")); + check(std::make_shared( + DataTypes{ + std::make_shared(), + std::make_shared(), + DataTypeFactory::instance().get("Array(Array(Array(UInt32)))")}, + DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 UUID)"))); + DataTypes argument_types = {std::make_shared()}; + Array parameters = {Field(0.1), Field(0.2)}; + AggregateFunctionProperties properties; + AggregateFunctionPtr function = AggregateFunctionFactory::instance().get("quantiles", NullsAction::EMPTY, argument_types, parameters, properties); + check(std::make_shared(function, argument_types, parameters)); + check(std::make_shared(function, argument_types, parameters, 2)); + check(DataTypeFactory::instance().get("AggregateFunction(sum, UInt64)")); + check(DataTypeFactory::instance().get("AggregateFunction(quantiles(0.5, 0.9), UInt64)")); + check(DataTypeFactory::instance().get("AggregateFunction(sequenceMatch('(?1)(?2)'), Date, UInt8, UInt8)")); + check(DataTypeFactory::instance().get("AggregateFunction(sumMapFiltered([1, 4, 8]), Array(UInt64), Array(UInt64))")); + check(DataTypeFactory::instance().get("LowCardinality(UInt32)")); + check(DataTypeFactory::instance().get("LowCardinality(Nullable(String))")); + check(DataTypeFactory::instance().get("Map(String, UInt32)")); + check(DataTypeFactory::instance().get("Map(String, Map(String, Map(String, UInt32)))")); + check(std::make_shared()); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Variant(String, UInt32, Date32)")); + check(std::make_shared()); + check(std::make_shared(10)); + check(std::make_shared(255)); + check(DataTypeFactory::instance().get("Bool")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(sum, UInt64)")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(maxMap, Tuple(Array(UInt32), Array(UInt32)))")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(groupArrayArray(19), Array(UInt64))")); + check(DataTypeFactory::instance().get("Nested(a UInt32, b UInt32)")); + check(DataTypeFactory::instance().get("Nested(a UInt32, b Nested(c String, d Nested(e Date)))")); + check(DataTypeFactory::instance().get("Ring")); + check(DataTypeFactory::instance().get("Point")); + check(DataTypeFactory::instance().get("Polygon")); + check(DataTypeFactory::instance().get("MultiPolygon")); + check(DataTypeFactory::instance().get("Tuple(Map(LowCardinality(String), Array(AggregateFunction(2, quantiles(0.1, 0.2), Float32))), Array(Array(Tuple(UInt32, Tuple(a Map(String, String), b Nullable(Date), c Variant(Tuple(g String, d Array(UInt32)), Date, Map(String, String)))))))")); +} diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 40234abb20f..67bce915168 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction qualified_name.database = table_identifier->getDatabaseName(); qualified_name.table = table_identifier->shortName(); } + else if (arg->as()) + { + /// Allow IN subquery. + /// Do not add tables from the subquery into dependencies, + /// because CREATE will succeed anyway. + return; + } else { assert(false); diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ccab72cfbae..f58b90f1134 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -12,7 +13,7 @@ #include #include #include -#include "Common/logger_useful.h" +#include #include #include #include @@ -106,12 +107,24 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { + // it is important to call the destructors of not_in_use without + // locked mutex to avoid potential deadlock. DetachedTables not_in_use; - std::lock_guard lock(mutex); - auto table = DatabaseOrdinary::detachTableUnlocked(name); - table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); - not_in_use = cleanupDetachedTables(); + StoragePtr table; + { + std::lock_guard lock(mutex); + table = DatabaseOrdinary::detachTableUnlocked(name); + table_name_to_path.erase(name); + detached_tables.emplace(table->getStorageID().uuid, table); + not_in_use = cleanupDetachedTables(); + } + + if (!not_in_use.empty()) + { + not_in_use.clear(); + LOG_DEBUG(log, "Finished removing not used detached tables"); + } + return table; } @@ -397,7 +410,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size()); while (it != detached_tables.end()) { - if (it->second.unique()) + if (isSharedPtrUnique(it->second)) { not_in_use.emplace(it->first, it->second); it = detached_tables.erase(it); diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 233db07cd68..da942cebf8f 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -305,7 +306,7 @@ try String table_name = expired_tables.front().table_name; auto it = tables_cache.find(table_name); - if (!it->second.table || it->second.table.unique()) + if (!it->second.table || isSharedPtrUnique(it->second.table)) { LOG_DEBUG(log, "Drop table {} from cache.", backQuote(it->first)); it->second.table.reset(); diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 1c82131af0d..bb24373a7e1 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL # include +# include # include # include # include @@ -354,7 +355,7 @@ void DatabaseMySQL::cleanOutdatedTables() { for (auto iterator = outdated_tables.begin(); iterator != outdated_tables.end();) { - if (!iterator->unique()) + if (!isSharedPtrUnique(*iterator)) ++iterator; else { diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 7ab4235feeb..2c342755337 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -532,13 +533,17 @@ static inline void dumpDataForTables( bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata) { bool opened_transaction = false; - mysqlxx::PoolWithFailover::Entry connection; while (!isCancelled()) { try { - connection = pool.tryGet(); + mysqlxx::PoolWithFailover::Entry connection = pool.tryGet(); + SCOPE_EXIT({ + if (opened_transaction) + connection->query("ROLLBACK").execute(); + }); + if (connection.isNull()) { if (settings->max_wait_time_when_mysql_unavailable < 0) @@ -602,9 +607,6 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { tryLogCurrentException(log); - if (opened_transaction) - connection->query("ROLLBACK").execute(); - if (settings->max_wait_time_when_mysql_unavailable < 0) throw; @@ -716,6 +718,16 @@ static void writeFieldsToColumn( null_map_column->insertValue(0); } + else + { + // Column is not null but field is null. It's possible due to overrides + if (field.isNull()) + { + column_to.insertDefault(); + return false; + } + } + return true; }; @@ -791,7 +803,7 @@ static void writeFieldsToColumn( if (write_data_to_null_map(value, index)) { - const String & data = value.get(); + const String & data = value.safeGet(); casted_string_column->insertData(data.data(), data.size()); } } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 7ce03c74c58..e38d09e99b2 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -529,7 +529,12 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory) } auto connection_info = postgres::formatConnectionString( - configuration.database, configuration.host, configuration.port, configuration.username, configuration.password); + configuration.database, + configuration.host, + configuration.port, + configuration.username, + configuration.password, + args.context->getSettingsRef().postgresql_connection_attempt_timeout); auto postgresql_replica_settings = std::make_unique(); if (engine_define->settings) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 136fb7fd6d2..b22356bebea 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -545,8 +545,9 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory) configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); return std::make_shared( args.context, diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index c7401386e40..b35e14577a8 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -205,8 +205,9 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) configuration.replicas_configurations, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); PostgreSQLDictionarySource::Configuration dictionary_configuration { diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index de7ee5a74f4..4aa7f6ff564 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -27,7 +27,8 @@ DiskPtr DiskFactory::create( ContextPtr context, const DisksMap & map, bool attach, - bool custom_disk) const + bool custom_disk, + const std::unordered_set & skip_types) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -38,6 +39,11 @@ DiskPtr DiskFactory::create( "DiskFactory: the disk '{}' has unknown disk type: {}", name, disk_type); } + if (skip_types.contains(found->first)) + { + return nullptr; + } + const auto & disk_creator = found->second; return disk_creator(name, config, config_prefix, context, map, attach, custom_disk); } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index d03ffa6a40f..044ce81dbae 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -42,7 +42,8 @@ public: ContextPtr context, const DisksMap & map, bool attach = false, - bool custom_disk = false) const; + bool custom_disk = false, + const std::unordered_set & skip_types = {}) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index a9260a249dd..f45d12618bf 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB { @@ -27,7 +26,8 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) +void DiskSelector::initialize( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); @@ -36,6 +36,8 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, constexpr auto default_disk_name = "default"; bool has_default_disk = false; + constexpr auto local_disk_name = "local"; + bool has_local_disk = false; for (const auto & disk_name : keys) { if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) @@ -44,21 +46,31 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, if (disk_name == default_disk_name) has_default_disk = true; + if (disk_name == local_disk_name) + has_local_disk = true; + const auto disk_config_prefix = config_prefix + "." + disk_name; if (disk_validator && !disk_validator(config, disk_config_prefix, disk_name)) continue; - - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); + auto created_disk + = factory.create(disk_name, config, disk_config_prefix, context, disks, /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk.get()) + { + disks.emplace(disk_name, std::move(created_disk)); + } } if (!has_default_disk) { disks.emplace( - default_disk_name, - std::make_shared( - default_disk_name, context->getPath(), 0, context, config, config_prefix)); + default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } + if (!has_local_disk && (context->getApplicationType() == Context::ApplicationType::DISKS)) + { + throw_away_local_on_update = true; + disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); + } is_initialized = true; } @@ -76,6 +88,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( std::shared_ptr result = std::make_shared(*this); constexpr auto default_disk_name = "default"; + constexpr auto local_disk_name = "local"; DisksMap old_disks_minus_new_disks(result->getDisksMap()); for (const auto & disk_name : keys) @@ -86,7 +99,12 @@ DiskSelectorPtr DiskSelector::updateFromConfig( auto disk_config_prefix = config_prefix + "." + disk_name; if (!result->getDisksMap().contains(disk_name)) { - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); + auto created_disk = factory.create( + disk_name, config, disk_config_prefix, context, result->getDisksMap(), /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk) + { + result->addToDiskMap(disk_name, created_disk); + } } else { @@ -99,6 +117,10 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); + if (throw_away_local_on_update) + { + old_disks_minus_new_disks.erase(local_disk_name); + } if (!old_disks_minus_new_disks.empty()) { diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 6669b428158..49a1be5cf50 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,7 +20,7 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - DiskSelector() = default; + explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -48,6 +48,10 @@ private: bool is_initialized = false; void assertInitialized() const; + + const std::unordered_set skip_types; + + bool throw_away_local_on_update = false; }; } diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 36d16d8d154..58407a810c5 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -439,13 +439,15 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo case FormatSettings::EscapingRule::CSV: result += fmt::format( ", use_best_effort_in_schema_inference={}, bool_true_representation={}, bool_false_representation={}," - " null_representation={}, delimiter={}, tuple_delimiter={}", + " null_representation={}, delimiter={}, tuple_delimiter={}, try_infer_numbers_from_strings={}, try_infer_strings_from_quoted_tuples={}", settings.csv.use_best_effort_in_schema_inference, settings.bool_true_representation, settings.bool_false_representation, settings.csv.null_representation, settings.csv.delimiter, - settings.csv.tuple_delimiter); + settings.csv.tuple_delimiter, + settings.csv.try_infer_numbers_from_strings, + settings.csv.try_infer_strings_from_quoted_tuples); break; case FormatSettings::EscapingRule::JSON: result += fmt::format( diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 79c2e6b4890..3d92023318e 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -243,6 +243,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride; format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down; + format_settings.orc.read_use_writer_time_zone = settings.input_format_orc_read_use_writer_time_zone; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference; @@ -269,9 +270,13 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; - format_settings.max_binary_string_size = settings.format_binary_max_string_size; - format_settings.max_binary_array_size = settings.format_binary_max_array_size; + format_settings.binary.max_binary_string_size = settings.format_binary_max_string_size; + format_settings.binary.max_binary_array_size = settings.format_binary_max_array_size; + format_settings.binary.encode_types_in_binary_format = settings.output_format_binary_encode_types_in_binary_format; + format_settings.binary.decode_types_in_binary_format = settings.input_format_binary_decode_types_in_binary_format; format_settings.native.allow_types_conversion = settings.input_format_native_allow_types_conversion; + format_settings.native.encode_types_in_binary_format = settings.output_format_native_encode_types_in_binary_format; + format_settings.native.decode_types_in_binary_format = settings.input_format_native_decode_types_in_binary_format; format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8ac783a1d86..8b38ffba524 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -106,8 +106,6 @@ struct FormatSettings UInt64 input_allow_errors_num = 0; Float32 input_allow_errors_ratio = 0; - UInt64 max_binary_string_size = 1_GiB; - UInt64 max_binary_array_size = 1_GiB; UInt64 client_protocol_version = 0; UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; @@ -121,6 +119,14 @@ struct FormatSettings ZSTD }; + struct + { + UInt64 max_binary_string_size = 1_GiB; + UInt64 max_binary_array_size = 1_GiB; + bool encode_types_in_binary_format = false; + bool decode_types_in_binary_format = false; + } binary{}; + struct { UInt64 row_group_size = 1000000; @@ -403,6 +409,7 @@ struct FormatSettings bool use_fast_decoder = true; bool filter_push_down = true; UInt64 output_row_index_stride = 10'000; + bool read_use_writer_time_zone = false; } orc{}; /// For capnProto format we should determine how to @@ -458,6 +465,8 @@ struct FormatSettings struct { bool allow_types_conversion = true; + bool encode_types_in_binary_format = false; + bool decode_types_in_binary_format = false; } native{}; struct diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp new file mode 100644 index 00000000000..242d2dc9f80 --- /dev/null +++ b/src/Formats/JSONExtractTree.cpp @@ -0,0 +1,1660 @@ +#include +#include + +#include +#if USE_SIMDJSON +#include +#endif +#if USE_RAPIDJSON +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings) +{ + if (element.isInt64()) + { + writeIntText(element.getInt64(), buf); + return; + } + if (element.isUInt64()) + { + writeIntText(element.getUInt64(), buf); + return; + } + if (element.isDouble()) + { + writeFloatText(element.getDouble(), buf); + return; + } + if (element.isBool()) + { + if (element.getBool()) + writeCString("true", buf); + else + writeCString("false", buf); + return; + } + if (element.isString()) + { + writeJSONString(element.getString(), buf, format_settings); + return; + } + if (element.isArray()) + { + writeChar('[', buf); + bool need_comma = false; + for (auto value : element.getArray()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar(']', buf); + return; + } + if (element.isObject()) + { + writeChar('{', buf); + bool need_comma = false; + for (auto [key, value] : element.getObject()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + writeJSONString(key, buf, format_settings); + writeChar(':', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar('}', buf); + return; + } + if (element.isNull()) + { + writeCString("null", buf); + return; + } +} + +template +bool tryGetNumericValueFromJSONElement( + NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error) +{ + switch (element.type()) + { + case ElementType::DOUBLE: + if constexpr (std::is_floating_point_v) + { + /// We permit inaccurate conversion of double to float. + /// Example: double 0.1 from JSON is not representable in float. + /// But it will be more convenient for user to perform conversion. + value = static_cast(element.getDouble()); + } + else if (!accurate::convertNumeric(element.getDouble(), value)) + { + error = fmt::format("cannot convert double value {} to {}", element.getDouble(), TypeName); + return false; + } + break; + case ElementType::UINT64: + if (!accurate::convertNumeric(element.getUInt64(), value)) + { + error = fmt::format("cannot convert UInt64 value {} to {}", element.getUInt64(), TypeName); + return false; + } + break; + case ElementType::INT64: + if (!accurate::convertNumeric(element.getInt64(), value)) + { + error = fmt::format("cannot convert Int64 value {} to {}", element.getInt64(), TypeName); + return false; + } + break; + case ElementType::BOOL: + if constexpr (is_integer) + { + if (convert_bool_to_integer) + { + value = static_cast(element.getBool()); + break; + } + } + error = fmt::format("cannot convert bool value to {}", TypeName); + return false; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if constexpr (std::is_floating_point_v) + { + if (!tryReadFloatText(value, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + else + { + if (tryReadIntText(value, rb) && rb.eof()) + break; + + /// Try to parse float and convert it to integer. + Float64 tmp_float; + rb.position() = rb.buffer().begin(); + if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + + if (!accurate::convertNumeric(tmp_float, value)) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + break; + } + default: + return false; + } + + return true; +} + +namespace +{ + +template +String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) +{ + WriteBufferFromOwnString buf; + jsonElementToString(element, buf, format_settings); + return buf.str(); +} + +template +class NumericNode : public JSONExtractTreeNode +{ +public: + explicit NumericNode(bool is_bool_type_ = false) : is_bool_type(is_bool_type_) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (is_bool_type) + value = static_cast(value); + + auto & col_vec = assert_cast &>(column); + col_vec.insertValue(value); + return true; + } + +protected: + bool is_bool_type; +}; + +template +class LowCardinalityNumericNode : public NumericNode +{ +public: + explicit LowCardinalityNumericNode(bool is_nullable_, bool is_bool_type_ = false) + : NumericNode(is_bool_type_), is_nullable(is_nullable_) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (this->is_bool_type) + value = static_cast(value); + + auto & col_lc = assert_cast(column); + col_lc.insertData(reinterpret_cast(&value), sizeof(value)); + return true; + } + +private: + bool is_nullable; +}; + +template +class StringNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto & col_str = assert_cast(column); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + } + else + { + auto value = element.getString(); + auto & col_str = assert_cast(column); + col_str.insertData(value.data(), value.size()); + } + return true; + } +}; + +template +class LowCardinalityStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityStringNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto value = jsonElementToString(element, format_settings); + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + auto value = element.getString(); + assert_cast(column).insertData(value.data(), value.size()); + } + + return true; + } + +private: + bool is_nullable; +}; + +template +class FixedStringNode : public JSONExtractTreeNode +{ +public: + explicit FixedStringNode(size_t fixed_length_) : fixed_length(fixed_length_) { } + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + assert_cast(column).insertData(value.data(), value.size()); + return true; + } + + size_t fixed_length; +}; + +template +class LowCardinalityFixedStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityFixedStringNode(bool is_nullable_, size_t fixed_length_) : is_nullable(is_nullable_), fixed_length(fixed_length_) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + + // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. + // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) + // the data is padded here and written directly to the Low Cardinality Column + if (value.size() == fixed_length) + { + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + String padded_value(value); + padded_value.resize(fixed_length, '\0'); + assert_cast(column).insertData(padded_value.data(), padded_value.size()); + } + return true; + } + + bool is_nullable; + size_t fixed_length; +}; + +template +class UUIDNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + UUID uuid; + if (!tryParse(uuid, data)) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + + assert_cast(column).insert(uuid); + return true; + } + + + static bool tryParse(UUID & uuid, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadUUIDText(uuid, buf) && buf.eof(); + } +}; + +template +class LowCardinalityUUIDNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityUUIDNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + UUID uuid; + if (!tryReadUUIDText(uuid, buf) || !buf.eof()) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + assert_cast(column).insertData(reinterpret_cast(&uuid), sizeof(uuid)); + return true; + } + +private: + bool is_nullable; +}; + +template +class DateNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read Date value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + DateType date; + if (!tryReadDateText(date, buf) || !buf.eof()) + { + error = fmt::format("cannot parse Date value here: {}", data); + return false; + } + + assert_cast &>(column).insertValue(date); + return true; + } +}; + +template +class DateTimeNode : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTimeNode(const DataTypeDateTime & datetime_type) : TimezoneMixin(datetime_type) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + time_t value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime value here: {}", element.getString()); + return false; + } + } + else if (element.isUInt64()) + { + value = element.getUInt64(); + } + else + { + error = fmt::format("cannot read DateTime value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(time_t & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(value, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffort(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUS(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } +}; + +template +class DecimalNode : public JSONExtractTreeNode +{ +public: + explicit DecimalNode(const DataTypePtr & type) : scale(assert_cast &>(*type).getScale()) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + DecimalType value{}; + + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, scale)) + { + error = fmt::format("cannot parse Decimal value here: {}", element.getString()); + return false; + } + break; + } + case ElementType::NULL_VALUE: { + if (!format_settings.null_as_default) + { + error = "cannot convert null to Decimal value"; + return false; + } + break; + } + default: + { + error = fmt::format("cannot read Decimal value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast &>(column).insertValue(value); + return true; + } + +private: + UInt32 scale; +}; + + +template +class DateTime64Node : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + DateTime64 value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime64 value here: {}", element.getString()); + return false; + } + } + else + { + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + default: + error = fmt::format("cannot read DateTime64 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(DateTime64 & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTime64Text(value, scale, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTime64BestEffort(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTime64BestEffortUS(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } + +private: + UInt32 scale; +}; + +template +class EnumNode : public JSONExtractTreeNode +{ +public: + explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) + { + for (const auto & name_value_pair : name_value_pairs) + { + name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); + only_values.emplace(name_value_pair.second); + } + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot convert null to Enum value"; + return false; + } + + auto & col_vec = assert_cast &>(column); + + if (element.isInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isUInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getUInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isString()) + { + auto value = name_to_value_map.find(element.getString()); + if (value == name_to_value_map.end()) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getString()); + return false; + } + col_vec.insertValue(value->second); + return true; + } + + error = fmt::format("cannot read Enum value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector> name_value_pairs; + std::unordered_map name_to_value_map; + std::unordered_set only_values; +}; + +template +class IPv4Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv4 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv4 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv4 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + static bool tryParse(IPv4 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv4Text(value, buf) && buf.eof(); + } +}; + +template +class IPv6Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv6 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv6 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv6 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + + static bool tryParse(IPv6 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv6Text(value, buf) && buf.eof(); + } +}; + +template +class NullableNode : public JSONExtractTreeNode +{ +public: + explicit NullableNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + column.insertDefault(); + return true; + } + + auto & col_null = assert_cast(column); + if (!nested->insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) + return false; + col_null.getNullMapColumn().insertValue(0); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class LowCardinalityNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityNode(bool is_nullable_, std::unique_ptr> nested_) + : is_nullable(is_nullable_), nested(std::move(nested_)) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + auto & col_lc = assert_cast(column); + auto tmp_nested = col_lc.getDictionary().getNestedColumn()->cloneEmpty(); + if (!nested->insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) + return false; + + col_lc.insertFromFullColumn(*tmp_nested, 0); + return true; + } + +private: + bool is_nullable; + std::unique_ptr> nested; +}; + +template +class ArrayNode : public JSONExtractTreeNode +{ +public: + explicit ArrayNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isArray()) + { + error = fmt::format("cannot read Array value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto array = element.getArray(); + + auto & col_arr = assert_cast(column); + auto & data = col_arr.getData(); + size_t old_size = data.size(); + bool were_valid_elements = false; + + for (auto value : array) + { + if (nested->insertResultToColumn(data, value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + data.insertDefault(); + } + else + { + data.popBack(data.size() - old_size); + return false; + } + } + + if (!were_valid_elements) + { + data.popBack(data.size() - old_size); + return false; + } + + col_arr.getOffsets().push_back(data.size()); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class TupleNode : public JSONExtractTreeNode +{ +public: + TupleNode(std::vector>> nested_, const std::vector & explicit_names_) + : nested(std::move(nested_)), explicit_names(explicit_names_) + { + for (size_t i = 0; i != explicit_names.size(); ++i) + name_to_index_map.emplace(explicit_names[i], i); + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + auto & tuple = assert_cast(column); + size_t old_size = column.size(); + bool were_valid_elements = false; + + auto set_size = [&](size_t size) + { + for (size_t i = 0; i != tuple.tupleSize(); ++i) + { + auto & col = tuple.getColumn(i); + if (col.size() != size) + { + if (col.size() > size) + col.popBack(col.size() - size); + else + while (col.size() < size) + col.insertDefault(); + } + } + }; + + if (element.isArray()) + { + auto array = element.getArray(); + auto it = array.begin(); + + for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) + { + if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + if (element.isObject()) + { + auto object = element.getObject(); + if (name_to_index_map.empty()) + { + auto it = object.begin(); + for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) + { + if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + } + else + { + for (const auto & [key, value] : object) + { + auto index = name_to_index_map.find(key); + if (index != name_to_index_map.end()) + { + if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (!insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + set_size(old_size); + error += fmt::format("(during reading tuple element \"{}\")", key); + return false; + } + } + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + error = fmt::format("cannot read Tuple value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> nested; + std::vector explicit_names; + std::unordered_map name_to_index_map; +}; + +template +class MapNode : public JSONExtractTreeNode +{ +public: + explicit MapNode(std::unique_ptr> value_) : value(std::move(value_)) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (!element.isObject()) + { + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto & map_col = assert_cast(column); + auto & offsets = map_col.getNestedColumn().getOffsets(); + auto & tuple_col = map_col.getNestedData(); + auto & key_col = tuple_col.getColumn(0); + auto & value_col = tuple_col.getColumn(1); + size_t old_size = tuple_col.size(); + + auto object = element.getObject(); + auto it = object.begin(); + for (; it != object.end(); ++it) + { + auto pair = *it; + + /// Insert key + key_col.insertData(pair.first.data(), pair.first.size()); + + /// Insert value + if (!value->insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) + { + if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + value_col.insertDefault(); + } + else + { + key_col.popBack(key_col.size() - offsets.back()); + value_col.popBack(value_col.size() - offsets.back()); + error += fmt::format("(during reading value of key \"{}\")", pair.first); + return false; + } + } + } + + offsets.push_back(old_size + object.size()); + return true; + } + +private: + std::unique_ptr> value; +}; + +template +class VariantNode : public JSONExtractTreeNode +{ +public: + VariantNode(std::vector>> variant_nodes_, std::vector order_) + : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + auto & column_variant = assert_cast(column); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]->insertResultToColumn(variant, element, insert_settings, format_settings, error)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; +}; + + +template +class DynamicNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + auto & column_dynamic = assert_cast(column); + /// First, check if element is NULL. + if (element.isNull()) + { + column_dynamic.insertDefault(); + return true; + } + + auto & variant_column = column_dynamic.getVariantColumn(); + auto variant_info = column_dynamic.getVariantInfo(); + /// Second, infer ClickHouse type for this element and add it as a new variant. + auto element_type = elementToDataType(element, format_settings); + if (column_dynamic.addNewVariant(element_type)) + { + auto node = buildJSONExtractTree(element_type, "Dynamic inference"); + auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; + auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); + if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); + variant_column.getOffsets().push_back(variant.size() - 1); + return true; + } + + /// We couldn't add new variant. Try to insert element into current variants. + auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); + if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) + return true; + + /// We couldn't insert element into any existing variant, add String variant and read value as String. + column_dynamic.addStringVariant(); + auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; + auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); + if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); + variant_column.getOffsets().push_back(string_column.size() - 1); + return true; + } + + static const std::unique_ptr> & getStringNode() + { + static const std::unique_ptr> string_node + = buildJSONExtractTree(std::make_shared(), "Dynamic inference"); + return string_node; + } + + static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings) + { + JSONInferenceInfo json_inference_info; + auto type = elementToDataTypeImpl(element, format_settings, json_inference_info); + transformFinalInferredJSONTypeIfNeeded(type, format_settings, &json_inference_info); + return type; + } + +private: + static DataTypePtr elementToDataTypeImpl(const typename JSONParser::Element & element, const FormatSettings & format_settings, JSONInferenceInfo & json_inference_info) + { + switch (element.type()) + { + case ElementType::NULL_VALUE: + return makeNullable(std::make_shared()); + case ElementType::BOOL: + return DataTypeFactory::instance().get("Bool"); + case ElementType::INT64: + { + auto type = std::make_shared(); + if (element.getInt64() < 0) + json_inference_info.negative_integers.insert(type.get()); + return type; + } + case ElementType::UINT64: + return std::make_shared(); + case ElementType::DOUBLE: + return std::make_shared(); + case ElementType::STRING: + { + auto data = element.getString(); + + if (auto type = tryInferDateOrDateTimeFromString(data, format_settings)) + return type; + + if (format_settings.json.try_infer_numbers_from_strings) + { + if (auto type = tryInferJSONNumberFromString(data, format_settings, &json_inference_info)) + { + json_inference_info.numbers_parsed_from_json_strings.insert(type.get()); + return type; + } + } + + return std::make_shared(); + } + case ElementType::ARRAY: + { + auto array = element.getArray(); + DataTypes types; + types.reserve(array.size()); + for (auto value : array) + types.push_back(makeNullableSafe(elementToDataTypeImpl(value, format_settings, json_inference_info))); + + if (types.empty()) + return std::make_shared(makeNullable(std::make_shared())); + + if (checkIfTypesAreEqual(types)) + return std::make_shared(types.back()); + + /// For JSON if we have not complete types, we should not try to transform them + /// and return it as a Tuple. + /// For example, if we have types [Nullable(Float64), Nullable(Nothing), Nullable(Float64)] + /// it can be Array(Nullable(Float64)) or Tuple(Nullable(Float64), , Nullable(Float64)) and + /// we can't determine which one it is right now. But we will be able to do it later + /// when we will have the final top level type. + /// For example, we can have JSON element [[42.42, null, 43.43], [44.44, "Some string", 45.45]] and we should + /// determine the type for this element as Tuple(Nullable(Float64), Nullable(String), Nullable(Float64)). + for (const auto & type : types) + { + if (!checkIfTypeIsComplete(type)) + return std::make_shared(types); + } + + auto types_copy = types; + transformInferredJSONTypesIfNeeded(types_copy, format_settings, &json_inference_info); + + if (checkIfTypesAreEqual(types_copy)) + return std::make_shared(types_copy.back()); + + return std::make_shared(types); + } + case ElementType::OBJECT: { + /// TODO: Use new JSON type here when it's ready. + return std::make_shared(std::make_shared(), makeNullable(std::make_shared())); + } + } + } +}; + +} + +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(); + case TypeIndex::UInt32: + return std::make_unique>(); + case TypeIndex::UInt64: + return std::make_unique>(); + case TypeIndex::UInt128: + return std::make_unique>(); + case TypeIndex::UInt256: + return std::make_unique>(); + case TypeIndex::Int8: + return std::make_unique>(); + case TypeIndex::Int16: + return std::make_unique>(); + case TypeIndex::Int32: + return std::make_unique>(); + case TypeIndex::Int64: + return std::make_unique>(); + case TypeIndex::Int128: + return std::make_unique>(); + case TypeIndex::Int256: + return std::make_unique>(); + case TypeIndex::Float32: + return std::make_unique>(); + case TypeIndex::Float64: + return std::make_unique>(); + case TypeIndex::String: + return std::make_unique>(); + case TypeIndex::FixedString: + return std::make_unique>(assert_cast(*type).getN()); + case TypeIndex::UUID: + return std::make_unique>(); + case TypeIndex::IPv4: + return std::make_unique>(); + case TypeIndex::IPv6: + return std::make_unique>(); + case TypeIndex::Date:; + return std::make_unique>(); + case TypeIndex::Date32: + return std::make_unique>(); + case TypeIndex::DateTime: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::DateTime64: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::Decimal32: + return std::make_unique>(type); + case TypeIndex::Decimal64: + return std::make_unique>(type); + case TypeIndex::Decimal128: + return std::make_unique>(type); + case TypeIndex::Decimal256: + return std::make_unique>(type); + case TypeIndex::Enum8: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::Enum16: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::LowCardinality: + { + /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. + const auto & lc_type = assert_cast(*type); + auto dictionary_type = removeNullable(lc_type.getDictionaryType()); + bool is_nullable = lc_type.isLowCardinalityNullable(); + + switch (dictionary_type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(is_nullable, isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(is_nullable); + case TypeIndex::UInt32: + return std::make_unique>(is_nullable); + case TypeIndex::UInt64: + return std::make_unique>(is_nullable); + case TypeIndex::Int8: + return std::make_unique>(is_nullable); + case TypeIndex::Int16: + return std::make_unique>(is_nullable); + case TypeIndex::Int32: + return std::make_unique>(is_nullable); + case TypeIndex::Int64: + return std::make_unique>(is_nullable); + case TypeIndex::Float32: + return std::make_unique>(is_nullable); + case TypeIndex::Float64: + return std::make_unique>(is_nullable); + case TypeIndex::String: + return std::make_unique>(is_nullable); + case TypeIndex::FixedString: + return std::make_unique>(is_nullable, assert_cast(*dictionary_type).getN()); + case TypeIndex::UUID: + return std::make_unique>(is_nullable); + default: + return std::make_unique>(is_nullable, buildJSONExtractTree(dictionary_type, source_for_exception_message)); + } + } + case TypeIndex::Nullable: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Array: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Tuple: + { + const auto & tuple = assert_cast(*type); + const auto & tuple_elements = tuple.getElements(); + std::vector>> elements; + elements.reserve(tuple_elements.size()); + for (const auto & tuple_element : tuple_elements) + elements.emplace_back(buildJSONExtractTree(tuple_element, source_for_exception_message)); + return std::make_unique>(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); + } + case TypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + const auto & key_type = map_type.getKeyType(); + if (!isString(removeLowCardinality(key_type))) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {} with key type not String", + source_for_exception_message, + type->getName()); + + const auto & value_type = map_type.getValueType(); + return std::make_unique>(buildJSONExtractTree(value_type, source_for_exception_message)); + } + case TypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector>> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(buildJSONExtractTree(variant, source_for_exception_message)); + return std::make_unique>(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } + case TypeIndex::Dynamic: + return std::make_unique>(); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {}", + source_for_exception_message, + type->getName()); + } +} + +#if USE_SIMDJSON +template void jsonElementToString(const SimdJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +#if USE_RAPIDJSON +template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +template bool tryGetNumericValueFromJSONElement(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, String & error); +#else +template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +} diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h new file mode 100644 index 00000000000..b5e82506548 --- /dev/null +++ b/src/Formats/JSONExtractTree.h @@ -0,0 +1,41 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +struct JSONExtractInsertSettings +{ + /// If false, JSON boolean values won't be inserted into columns with integer types + /// It's used in JSONExtractInt64/JSONExtractUInt64/... functions. + bool convert_bool_to_integer = true; + /// If true, when complex type like Array/Map has both valid and invalid elements, + /// the default value will be inserted on invalid elements. + /// For example, if we have [1, "hello", 2] and type Array(UInt32), + /// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions. + bool insert_default_on_invalid_elements_in_complex_types = false; +}; + +template +class JSONExtractTreeNode +{ +public: + JSONExtractTreeNode() = default; + virtual ~JSONExtractTreeNode() = default; + virtual bool insertResultToColumn(IColumn &, const typename JSONParser::Element &, const JSONExtractInsertSettings & insert_setting, const FormatSettings & format_settings, String & error) const = 0; +}; + +/// Build a tree for insertion JSON element into a column with provided data type. +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); + +template +bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error); + +} diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index fa5d41d6536..f9b56cf4f61 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -31,8 +32,8 @@ namespace ErrorCodes } -NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_) - : istr(istr_), server_revision(server_revision_) +NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, std::optional format_settings_) + : istr(istr_), server_revision(server_revision_), format_settings(format_settings_) { } @@ -40,16 +41,12 @@ NativeReader::NativeReader( ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, - bool skip_unknown_columns_, - bool null_as_default_, - bool allow_types_conversion_, + std::optional format_settings_, BlockMissingValues * block_missing_values_) : istr(istr_) , header(header_) , server_revision(server_revision_) - , skip_unknown_columns(skip_unknown_columns_) - , null_as_default(null_as_default_) - , allow_types_conversion(allow_types_conversion_) + , format_settings(std::move(format_settings_)) , block_missing_values(block_missing_values_) { } @@ -83,13 +80,14 @@ void NativeReader::resetParser() use_index = false; } -void NativeReader::readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint) +static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, const std::optional & format_settings, size_t rows, double avg_value_size_hint) { ISerialization::DeserializeBinaryBulkSettings settings; settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; settings.avg_value_size_hint = avg_value_size_hint; settings.position_independent_encoding = false; settings.native_format = true; + settings.data_types_binary_encoding = format_settings && format_settings->native.decode_types_in_binary_format; ISerialization::DeserializeBinaryBulkStatePtr state; @@ -167,8 +165,16 @@ Block NativeReader::read() /// Type String type_name; - readBinary(type_name, istr); - column.type = data_type_factory.get(type_name); + if (format_settings && format_settings->native.decode_types_in_binary_format) + { + column.type = decodeDataType(istr); + type_name = column.type->getName(); + } + else + { + readBinary(type_name, istr); + column.type = data_type_factory.get(type_name); + } setVersionToAggregateFunctions(column.type, true, server_revision); @@ -203,7 +209,7 @@ Block NativeReader::read() double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i]; if (rows) /// If no rows, nothing to read. - readData(*serialization, read_column, istr, rows, avg_value_size_hint); + readData(*serialization, read_column, istr, format_settings, rows, avg_value_size_hint); column.column = std::move(read_column); @@ -214,12 +220,12 @@ Block NativeReader::read() { auto & header_column = header.getByName(column.name); - if (null_as_default) + if (format_settings && format_settings->null_as_default) insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values); if (!header_column.type->equals(*column.type)) { - if (allow_types_conversion) + if (format_settings && format_settings->native.allow_types_conversion) { try { @@ -246,7 +252,7 @@ Block NativeReader::read() } else { - if (!skip_unknown_columns) + if (format_settings && !format_settings->skip_unknown_fields) throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column with name {} found while reading data in Native format", column.name); use_in_result = false; } diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 3cec4afd997..97b6ea22b15 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -20,7 +20,7 @@ class NativeReader { public: /// If a non-zero server_revision is specified, additional block information may be expected and read. - NativeReader(ReadBuffer & istr_, UInt64 server_revision_); + NativeReader(ReadBuffer & istr_, UInt64 server_revision_, std::optional format_settings_ = std::nullopt); /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. @@ -28,9 +28,7 @@ public: ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, - bool skip_unknown_columns_ = false, - bool null_as_default_ = false, - bool allow_types_conversion_ = false, + std::optional format_settings_ = std::nullopt, BlockMissingValues * block_missing_values_ = nullptr); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. @@ -38,8 +36,6 @@ public: IndexForNativeFormat::Blocks::const_iterator index_block_it_, IndexForNativeFormat::Blocks::const_iterator index_block_end_); - static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint); - Block getHeader() const; void resetParser(); @@ -50,9 +46,7 @@ private: ReadBuffer & istr; Block header; UInt64 server_revision; - bool skip_unknown_columns = false; - bool null_as_default = false; - bool allow_types_conversion = false; + std::optional format_settings = std::nullopt; BlockMissingValues * block_missing_values = nullptr; bool use_index = false; diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index b150561a5fc..3c87e489b1c 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -25,10 +26,20 @@ namespace ErrorCodes NativeWriter::NativeWriter( - WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_, - IndexForNativeFormat * index_, size_t initial_size_of_file_) - : ostr(ostr_), client_revision(client_revision_), header(header_), - index(index_), initial_size_of_file(initial_size_of_file_), remove_low_cardinality(remove_low_cardinality_) + WriteBuffer & ostr_, + UInt64 client_revision_, + const Block & header_, + std::optional format_settings_, + bool remove_low_cardinality_, + IndexForNativeFormat * index_, + size_t initial_size_of_file_) + : ostr(ostr_) + , client_revision(client_revision_) + , header(header_) + , index(index_) + , initial_size_of_file(initial_size_of_file_) + , remove_low_cardinality(remove_low_cardinality_) + , format_settings(std::move(format_settings_)) { if (index) { @@ -45,7 +56,7 @@ void NativeWriter::flush() } -static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) +static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, const std::optional & format_settings, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) @@ -57,6 +68,7 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; + settings.data_types_binary_encoding = format_settings && format_settings->native.encode_types_in_binary_format; ISerialization::SerializeBinaryBulkStatePtr state; serialization.serializeBinaryBulkStatePrefix(*full_column, settings, state); @@ -121,15 +133,22 @@ size_t NativeWriter::write(const Block & block) setVersionToAggregateFunctions(column.type, include_version, include_version ? std::optional(client_revision) : std::nullopt); /// Type - String type_name = column.type->getName(); + if (format_settings && format_settings->native.encode_types_in_binary_format) + { + encodeDataType(column.type, ostr); + } + else + { + String type_name = column.type->getName(); - /// For compatibility, we will not send explicit timezone parameter in DateTime data type - /// to older clients, that cannot understand it. - if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE - && startsWith(type_name, "DateTime(")) - type_name = "DateTime"; + /// For compatibility, we will not send explicit timezone parameter in DateTime data type + /// to older clients, that cannot understand it. + if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE + && startsWith(type_name, "DateTime(")) + type_name = "DateTime"; - writeStringBinary(type_name, ostr); + writeStringBinary(type_name, ostr); + } /// Serialization. Dynamic, if client supports it. SerializationPtr serialization; @@ -161,7 +180,7 @@ size_t NativeWriter::write(const Block & block) /// Data if (rows) /// Zero items of data is always represented as zero number of bytes. - writeData(*serialization, column.column, ostr, 0, 0); + writeData(*serialization, column.column, ostr, format_settings, 0, 0); if (index) { diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index 7bb377d2e4a..b4903243d45 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -23,7 +24,7 @@ public: /** If non-zero client_revision is specified, additional block information can be written. */ NativeWriter( - WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_ = false, + WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, std::optional format_settings_ = std::nullopt, bool remove_low_cardinality_ = false, IndexForNativeFormat * index_ = nullptr, size_t initial_size_of_file_ = 0); Block getHeader() const { return header; } @@ -44,6 +45,7 @@ private: CompressedWriteBuffer * ostr_concrete = nullptr; bool remove_low_cardinality; + std::optional format_settings; }; } diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 31faea2e13e..3c374ada9e6 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -225,19 +225,6 @@ namespace Paths paths; }; - bool checkIfTypesAreEqual(const DataTypes & types) - { - if (types.empty()) - return true; - - for (size_t i = 1; i < types.size(); ++i) - { - if (!types[0]->equals(*types[i])) - return false; - } - return true; - } - void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -272,24 +259,31 @@ namespace type_indexes.erase(TypeIndex::Nothing); } - /// If we have both Int64 and UInt64, convert all Int64 to UInt64, + /// If we have both Int64 and UInt64, convert all not-negative Int64 to UInt64, /// because UInt64 is inferred only in case of Int64 overflow. - void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64)) return; + bool have_negative_integers = false; for (auto & type : data_types) { if (WhichDataType(type).isInt64()) - type = std::make_shared(); + { + bool is_negative = json_info && json_info->negative_integers.contains(type.get()); + have_negative_integers |= is_negative; + if (!is_negative) + type = std::make_shared(); + } } - type_indexes.erase(TypeIndex::Int64); + if (!have_negative_integers) + type_indexes.erase(TypeIndex::Int64); } /// If we have both Int64 and Float64 types, convert all Int64 to Float64. - void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { bool have_floats = type_indexes.contains(TypeIndex::Float64); bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); @@ -300,7 +294,12 @@ namespace { WhichDataType which(type); if (which.isInt64() || which.isUInt64()) - type = std::make_shared(); + { + auto new_type = std::make_shared(); + if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get())) + json_info->numbers_parsed_from_json_strings.insert(new_type.get()); + type = new_type; + } } type_indexes.erase(TypeIndex::Int64); @@ -635,9 +634,9 @@ namespace if (settings.try_infer_integers) { /// Transform Int64 to UInt64 if needed. - transformIntegers(data_types, type_indexes); + transformIntegers(data_types, type_indexes, json_info); /// Transform integers to floats if needed. - transformIntegersAndFloatsToFloats(data_types, type_indexes); + transformIntegersAndFloatsToFloats(data_types, type_indexes, json_info); } /// Transform Date to DateTime or both to String if needed. @@ -887,7 +886,7 @@ namespace } template - DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) + DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) { if (buf.eof()) return nullptr; @@ -911,7 +910,12 @@ namespace Int64 tmp_int; buf.position() = number_start; if (tryReadIntText(tmp_int, buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } /// In case of Int64 overflow we can try to infer UInt64. UInt64 tmp_uint; @@ -934,7 +938,12 @@ namespace Int64 tmp_int; if (tryReadIntText(tmp_int, peekable_buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } peekable_buf.rollbackToCheckpoint(/* drop= */ true); /// In case of Int64 overflow we can try to infer UInt64. @@ -952,7 +961,7 @@ namespace } template - DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings) + DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_inference_info = nullptr) { ReadBufferFromString buf(field); @@ -960,7 +969,12 @@ namespace { Int64 tmp_int; if (tryReadIntText(tmp_int, buf) && buf.eof()) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_inference_info && tmp_int < 0) + json_inference_info->negative_integers.insert(type.get()); + return type; + } /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. buf.position() = buf.buffer().begin(); @@ -1011,7 +1025,7 @@ namespace { if (settings.json.try_infer_numbers_from_strings) { - if (auto number_type = tryInferNumberFromStringImpl(field, settings)) + if (auto number_type = tryInferNumberFromStringImpl(field, settings, json_info)) { json_info->numbers_parsed_from_json_strings.insert(number_type.get()); return number_type; @@ -1254,10 +1268,23 @@ namespace } /// Number - return tryInferNumber(buf, settings); + return tryInferNumber(buf, settings, json_info); } } +bool checkIfTypesAreEqual(const DataTypes & types) +{ + if (types.empty()) + return true; + + for (size_t i = 1; i < types.size(); ++i) + { + if (!types[0]->equals(*types[i])) + return false; + } + return true; +} + void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { DataTypes types = {first, second}; @@ -1275,6 +1302,11 @@ void transformInferredJSONTypesIfNeeded( second = std::move(types[1]); } +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + transformInferredTypesIfNeededImpl(types, settings, json_info); +} + void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { JSONInferenceInfo json_info; @@ -1396,6 +1428,12 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting return tryInferNumberFromStringImpl(field, settings); } +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + return tryInferNumberFromStringImpl(field, settings, json_info); + +} + DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings) { if (settings.try_infer_dates && tryInferDate(field)) diff --git a/src/Formats/SchemaInferenceUtils.h b/src/Formats/SchemaInferenceUtils.h index bcf3d194825..06c14c0797a 100644 --- a/src/Formats/SchemaInferenceUtils.h +++ b/src/Formats/SchemaInferenceUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -18,6 +19,11 @@ struct JSONInferenceInfo /// We store numbers that were parsed from strings. /// It's used in types transformation to change such numbers back to string if needed. std::unordered_set numbers_parsed_from_json_strings; + /// Store integer types that were inferred from negative numbers. + /// It's used to determine common type for Int64 and UInt64 + /// TODO: check it not only in JSON formats. + std::unordered_set negative_integers; + /// Indicates if currently we are inferring type for Map/Object key. bool is_object_key = false; /// When we transform types for the same column from different files @@ -48,6 +54,7 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma /// Try to parse a number value from a string. By default, it tries to parse Float64, /// but if setting try_infer_integers is enabled, it also tries to parse Int64. DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings); +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info); /// It takes two types inferred for the same column and tries to transform them to a common type if possible. /// It's also used when we try to infer some not ordinary types from another types. @@ -77,6 +84,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c /// Example 2: /// We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths. void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info); +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info); /// Make final transform for types inferred in JSON format. It does 3 types of transformation: /// 1) Checks if type is unnamed Tuple(...), tries to transform nested types to find a common type for them and if all nested types @@ -107,4 +115,6 @@ NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); /// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String)) bool checkIfTypeIsComplete(const DataTypePtr & type); +bool checkIfTypesAreEqual(const DataTypes & types); + } diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 0f3f8be96a7..ab10d402df4 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -3,14 +3,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include namespace DB { @@ -218,10 +218,7 @@ struct UnbinImpl static constexpr auto name = "unbin"; static constexpr size_t word_size = 8; - static void decode(const char * pos, const char * end, char *& out) - { - binStringDecode(pos, end, out); - } + static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); } }; /// Encode number or string to string with binary or hexadecimal representation @@ -651,7 +648,15 @@ public: size_t size = in_offsets.size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + + size_t max_out_len = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) + - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; + max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1; + } + out_vec.resize(max_out_len); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -661,6 +666,7 @@ public: { size_t new_offset = in_offsets[i]; + /// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; @@ -668,6 +674,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; @@ -680,11 +689,11 @@ public: ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col_fix_string->getChars(); - size_t n = col_fix_string->getN(); + const size_t n = col_fix_string->getN(); size_t size = col_fix_string->size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -694,6 +703,7 @@ public: { size_t new_offset = prev_offset + n; + /// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset]), pos); out_offsets[i] = pos - begin; @@ -701,6 +711,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index adabda1a7f8..beaaccad6db 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -284,7 +284,12 @@ public: { while (x) { - result_array_values_data.push_back(std::countr_zero(x)); + /// С++20 char8_t is not an unsigned integral type anymore https://godbolt.org/z/Mqcb7qn58 + /// and thus you cannot use std::countr_zero on it. + if constexpr (std::is_same_v) + result_array_values_data.push_back(std::countr_zero(static_cast(x))); + else + result_array_values_data.push_back(std::countr_zero(x)); x &= (x - 1); } } @@ -336,4 +341,3 @@ REGISTER_FUNCTION(BitToArray) } } - diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index fbd987577e9..db1602b1939 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1,10 +1,1063 @@ -#include +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include +#include +#include +#include +#include + +#include + +#include "config.h" namespace DB { +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; + +/// Functions to parse JSONs and extract values from it. +/// The first argument of all these functions gets a JSON, +/// after that there are any number of arguments specifying path to a desired part from the JSON's root. +/// For example, +/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 + +class FunctionJSONHelpers +{ +public: + template typename Impl, class JSONParser> + class Executor + { + public: + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FormatSettings & format_settings) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); + + const auto & first_column = arguments[0]; + if (!isString(first_column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string containing JSON, illegal type: " + "{}", String(Name::name), first_column.type->getName()); + + const ColumnPtr & arg_json = first_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + if (!col_json_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); + + const ColumnString::Chars & chars = col_json_string->getChars(); + const ColumnString::Offsets & offsets = col_json_string->getOffsets(); + + size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); + std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); + + /// Preallocate memory in parser if necessary. + JSONParser parser; + if constexpr (has_member_function_reserve::value) + { + size_t max_size = calculateMaxSize(offsets); + if (max_size) + parser.reserve(max_size); + } + + Impl impl; + + /// prepare() does Impl-specific preparation before handling each row. + if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) + impl.prepare(Name::name, arguments, result_type); + + using Element = typename JSONParser::Element; + + Element document; + bool document_ok = false; + if (col_json_const) + { + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; + document_ok = parser.parse(json, document); + } + + String error; + for (const auto i : collections::range(0, input_rows_count)) + { + if (!col_json_const) + { + std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; + document_ok = parser.parse(json, document); + } + + bool added_to_column = false; + if (document_ok) + { + /// Perform moves. + Element element; + std::string_view last_key; + bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); + + if (moves_ok) + added_to_column = impl.insertResultToColumn(*to, element, last_key, format_settings, error); + } + + /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. + if (!added_to_column) + to->insertDefault(); + } + return to; + } + }; + +private: + BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) + BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) + + /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. + /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) + /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. + /// Keys and indices can be nonconst, in this case they are calculated for each row. + enum class MoveType : uint8_t + { + Key, + Index, + ConstKey, + ConstIndex, + }; + + struct Move + { + explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} + Move(MoveType type_, const String & key_) : type(type_), key(key_) {} + MoveType type; + size_t index = 0; + String key; + }; + + static std::vector prepareMoves( + const char * function_name, + const ColumnsWithTypeAndName & columns, + size_t first_index_argument, + size_t num_index_arguments) + { + std::vector moves; + moves.reserve(num_index_arguments); + for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) + { + const auto & column = columns[i]; + if (!isString(column.type) && !isNativeInteger(column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument {} of function {} should be a string specifying key " + "or an integer specifying index, illegal type: {}", + std::to_string(i + 1), String(function_name), column.type->getName()); + + if (column.column && isColumnConst(*column.column)) + { + const auto & column_const = assert_cast(*column.column); + if (isString(column.type)) + moves.emplace_back(MoveType::ConstKey, column_const.getValue()); + else + moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); + } + else + { + if (isString(column.type)) + moves.emplace_back(MoveType::Key, ""); + else + moves.emplace_back(MoveType::Index, 0); + } + } + return moves; + } + + + /// Performs moves of types MoveType::Index and MoveType::ConstIndex. + template + static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, + const typename JSONParser::Element & document, const std::vector & moves, + typename JSONParser::Element & element, std::string_view & last_key) + { + typename JSONParser::Element res_element = document; + std::string_view key; + + for (size_t j = 0; j != moves.size(); ++j) + { + switch (moves[j].type) + { + case MoveType::ConstIndex: + { + if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) + return false; + break; + } + case MoveType::ConstKey: + { + key = moves[j].key; + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + case MoveType::Index: + { + Int64 index = (*arguments[j + 1].column)[row].get(); + if (!moveToElementByIndex(res_element, static_cast(index), key)) + return false; + break; + } + case MoveType::Key: + { + key = arguments[j + 1].column->getDataAt(row).toView(); + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + } + } + + element = res_element; + last_key = key; + return true; + } + + template + static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) + { + if (element.isArray()) + { + auto array = element.getArray(); + if (index >= 0) + --index; + else + index += array.size(); + + if (static_cast(index) >= array.size()) + return false; + element = array[index]; + out_key = {}; + return true; + } + + if constexpr (HasIndexOperator) + { + if (element.isObject()) + { + auto object = element.getObject(); + if (index >= 0) + --index; + else + index += object.size(); + + if (static_cast(index) >= object.size()) + return false; + std::tie(out_key, element) = object[index]; + return true; + } + } + + return {}; + } + + /// Performs moves of types MoveType::Key and MoveType::ConstKey. + template + static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) + { + if (!element.isObject()) + return false; + auto object = element.getObject(); + return object.find(key, element); + } + + static size_t calculateMaxSize(const ColumnString::Offsets & offsets) + { + size_t max_size = 0; + for (const auto i : collections::range(0, offsets.size())) + { + size_t size = offsets[i] - offsets[i - 1]; + max_size = std::max(max_size, size); + } + if (max_size) + --max_size; + return max_size; + } + +}; + +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} + +template typename Impl> +class ExecutableFunctionJSON : public IExecutableFunction +{ + +public: + explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_, const FormatSettings & format_settings_) + : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_), format_settings(format_settings_) + { + /// Don't escape forward slashes during converting JSON elements to raw string. + format_settings.json.escape_forward_slashes = false; + /// Don't insert default values on null during traversing the JSON element. + /// We allow to insert null only to Nullable columns in JSONExtract functions. + format_settings.null_as_default = false; + } + + String getName() const override { return Name::name; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + } + +private: + + ColumnPtr + chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { +#if USE_SIMDJSON + if (allow_simdjson) + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + +#if USE_RAPIDJSON + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#else + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + } + + NullPresence null_presence; + bool allow_simdjson; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + + +template typename Impl> +class FunctionBaseFunctionJSON : public IFunctionBase +{ +public: + explicit FunctionBaseFunctionJSON( + const NullPresence & null_presence_, + bool allow_simdjson_, + DataTypes argument_types_, + DataTypePtr return_type_, + DataTypePtr json_return_type_, + const FormatSettings & format_settings_) + : null_presence(null_presence_) + , allow_simdjson(allow_simdjson_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) + , json_return_type(std::move(json_return_type_)) + , format_settings(format_settings_) + { + } + + String getName() const override { return Name::name; } + + const DataTypes & getArgumentTypes() const override + { + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique>(null_presence, allow_simdjson, json_return_type, format_settings); + } + +private: + NullPresence null_presence; + bool allow_simdjson; + DataTypes argument_types; + DataTypePtr return_type; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + +/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +template typename Impl> +class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +{ +public: + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(context_); + } + + explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override + { + bool has_nothing_argument = false; + for (const auto & arg : arguments) + has_nothing_argument |= isNothing(arg.type); + + DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); + NullPresence null_presence = getNullPresense(arguments); + DataTypePtr return_type; + if (has_nothing_argument) + return_type = std::make_shared(); + else if (null_presence.has_null_constant) + return_type = makeNullable(std::make_shared()); + else if (null_presence.has_nullable) + return_type = makeNullable(json_return_type); + else + return_type = json_return_type; + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & argument : arguments) + argument_types.emplace_back(argument.type); + return std::make_unique>( + null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type, getFormatSettings(getContext())); + } +}; + +struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +struct NameJSONType { static constexpr auto name{"JSONType"}; }; +struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; + + +template +class JSONHasImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class IsValidJSONImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() != 1) + { + /// IsValidJSON() shouldn't get parameters other than JSON. + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", + String(function_name)); + } + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + /// This function is called only if JSON is valid. + /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class JSONLengthImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + size_t size; + if (element.isArray()) + size = element.getArray().size(); + else if (element.isObject()) + size = element.getObject().size(); + else + return false; + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(size); + return true; + } +}; + + +template +class JSONKeyImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key, const FormatSettings &, String &) + { + if (last_key.empty()) + return false; + ColumnString & col_str = assert_cast(dest); + col_str.insertData(last_key.data(), last_key.size()); + return true; + } +}; + + +template +class JSONTypeImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + static const std::vector> values = { + {"Array", '['}, + {"Object", '{'}, + {"String", '"'}, + {"Int64", 'i'}, + {"UInt64", 'u'}, + {"Double", 'd'}, + {"Bool", 'b'}, + {"Null", 0}, /// the default value for the column. + }; + return std::make_shared>(values); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + UInt8 type; + switch (element.type()) + { + case ElementType::INT64: + type = 'i'; + break; + case ElementType::UINT64: + type = 'u'; + break; + case ElementType::DOUBLE: + type = 'd'; + break; + case ElementType::STRING: + type = '"'; + break; + case ElementType::ARRAY: + type = '['; + break; + case ElementType::OBJECT: + type = '{'; + break; + case ElementType::BOOL: + type = 'b'; + break; + case ElementType::NULL_VALUE: + type = 0; + break; + } + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(type); + return true; + } +}; + + +template +class JSONExtractNumericImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared>(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static const std::unique_ptr> & getInsertNode() + { + static const std::unique_ptr> node = buildJSONExtractTree(std::make_shared>()); + } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String & error) + { + NumberType value; + + tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); + return true; + } +}; + + +template +using JSONExtractInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractUInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractFloat64Impl = JSONExtractNumericImpl; + + +template +class JSONExtractBoolImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + bool value; + switch (element.type()) + { + case ElementType::BOOL: + value = element.getBool(); + break; + case ElementType::INT64: + value = element.getInt64() != 0; + break; + case ElementType::UINT64: + value = element.getUInt64() != 0; + break; + default: + return false; + } + + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(static_cast(value)); + return true; + } +}; + +template +class JSONExtractRawImpl; + +template +class JSONExtractStringImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (element.isNull()) + return false; + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}, format_settings, error); + + auto str = element.getString(); + ColumnString & col_str = assert_cast(dest); + col_str.insertData(str.data(), str.size()); + return true; + } +}; + +template +class JSONExtractImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the return data type, illegal value: {}", + String(function_name), col.name); + + return DataTypeFactory::instance().get(col_type_const->getValue()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + extract_tree = buildJSONExtractTree(result_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + return extract_tree->insertResultToColumn(dest, element, insert_settings, format_settings, error); + } + +protected: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractKeysAndValuesImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the values' data type, illegal value: {}", + String(function_name), col.name); + + DataTypePtr key_type = std::make_unique(); + DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); + DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); + const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; + extract_tree = buildJSONExtractTree(value_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + size_t old_size = col_tuple.size(); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = col_tuple.getColumn(1); + + for (const auto & [key, value] : object) + { + if (extract_tree->insertResultToColumn(col_value, value, insert_settings, format_settings, error)) + col_key.insertData(key.data(), key.size()); + } + + if (col_tuple.size() == old_size) + return false; + + col_arr.getOffsets().push_back(col_tuple.size()); + return true; + } + +private: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String &) + { + ColumnString & col_str = assert_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + return true; + } +}; + + +template +class JSONExtractArrayRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isArray()) + return false; + + auto array = element.getArray(); + ColumnArray & col_res = assert_cast(dest); + + for (auto value : array) + JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}, format_settings, error); + + col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); + return true; + } +}; + + +template +class JSONExtractKeysAndValuesRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + DataTypePtr string_type = std::make_unique(); + DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = assert_cast(col_tuple.getColumn(1)); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + JSONExtractRawImpl::insertResultToColumn(col_value, value, {}, format_settings, error); + } + + col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); + return true; + } +}; + +template +class JSONExtractKeysImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_unique(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + ColumnArray & col_res = assert_cast(dest); + auto & col_key = assert_cast(col_res.getData()); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + } + + col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); + return true; + } +}; + REGISTER_FUNCTION(JSON) { factory.registerFunction>(); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h deleted file mode 100644 index 8a2ad457d34..00000000000 --- a/src/Functions/FunctionsJSON.h +++ /dev/null @@ -1,1781 +0,0 @@ -#pragma once - -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -#include "config.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -template -concept HasIndexOperator = requires (T t) -{ - t[0]; -}; - -/// Functions to parse JSONs and extract values from it. -/// The first argument of all these functions gets a JSON, -/// after that there are any number of arguments specifying path to a desired part from the JSON's root. -/// For example, -/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 - -class FunctionJSONHelpers -{ -public: - template typename Impl, class JSONParser> - class Executor - { - public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) - { - MutableColumnPtr to{result_type->createColumn()}; - to->reserve(input_rows_count); - - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); - - const auto & first_column = arguments[0]; - if (!isString(first_column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The first argument of function {} should be a string containing JSON, illegal type: " - "{}", String(Name::name), first_column.type->getName()); - - const ColumnPtr & arg_json = first_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_string - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - if (!col_json_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); - - const ColumnString::Chars & chars = col_json_string->getChars(); - const ColumnString::Offsets & offsets = col_json_string->getOffsets(); - - size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); - std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); - - /// Preallocate memory in parser if necessary. - JSONParser parser; - if constexpr (has_member_function_reserve::value) - { - size_t max_size = calculateMaxSize(offsets); - if (max_size) - parser.reserve(max_size); - } - - Impl impl; - - /// prepare() does Impl-specific preparation before handling each row. - if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) - impl.prepare(Name::name, arguments, result_type); - - using Element = typename JSONParser::Element; - - Element document; - bool document_ok = false; - if (col_json_const) - { - std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; - document_ok = parser.parse(json, document); - } - - for (const auto i : collections::range(0, input_rows_count)) - { - if (!col_json_const) - { - std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; - document_ok = parser.parse(json, document); - } - - bool added_to_column = false; - if (document_ok) - { - /// Perform moves. - Element element; - std::string_view last_key; - bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); - - if (moves_ok) - added_to_column = impl.insertResultToColumn(*to, element, last_key); - } - - /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. - if (!added_to_column) - to->insertDefault(); - } - return to; - } - }; - -private: - BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) - BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) - - /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. - /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) - /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. - /// Keys and indices can be nonconst, in this case they are calculated for each row. - enum class MoveType : uint8_t - { - Key, - Index, - ConstKey, - ConstIndex, - }; - - struct Move - { - explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} - Move(MoveType type_, const String & key_) : type(type_), key(key_) {} - MoveType type; - size_t index = 0; - String key; - }; - - static std::vector prepareMoves( - const char * function_name, - const ColumnsWithTypeAndName & columns, - size_t first_index_argument, - size_t num_index_arguments) - { - std::vector moves; - moves.reserve(num_index_arguments); - for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) - { - const auto & column = columns[i]; - if (!isString(column.type) && !isNativeInteger(column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The argument {} of function {} should be a string specifying key " - "or an integer specifying index, illegal type: {}", - std::to_string(i + 1), String(function_name), column.type->getName()); - - if (column.column && isColumnConst(*column.column)) - { - const auto & column_const = assert_cast(*column.column); - if (isString(column.type)) - moves.emplace_back(MoveType::ConstKey, column_const.getValue()); - else - moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); - } - else - { - if (isString(column.type)) - moves.emplace_back(MoveType::Key, ""); - else - moves.emplace_back(MoveType::Index, 0); - } - } - return moves; - } - - - /// Performs moves of types MoveType::Index and MoveType::ConstIndex. - template - static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, - const typename JSONParser::Element & document, const std::vector & moves, - typename JSONParser::Element & element, std::string_view & last_key) - { - typename JSONParser::Element res_element = document; - std::string_view key; - - for (size_t j = 0; j != moves.size(); ++j) - { - switch (moves[j].type) - { - case MoveType::ConstIndex: - { - if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) - return false; - break; - } - case MoveType::ConstKey: - { - key = moves[j].key; - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - case MoveType::Index: - { - Int64 index = (*arguments[j + 1].column)[row].get(); - if (!moveToElementByIndex(res_element, static_cast(index), key)) - return false; - break; - } - case MoveType::Key: - { - key = arguments[j + 1].column->getDataAt(row).toView(); - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - } - } - - element = res_element; - last_key = key; - return true; - } - - template - static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) - { - if (element.isArray()) - { - auto array = element.getArray(); - if (index >= 0) - --index; - else - index += array.size(); - - if (static_cast(index) >= array.size()) - return false; - element = array[index]; - out_key = {}; - return true; - } - - if constexpr (HasIndexOperator) - { - if (element.isObject()) - { - auto object = element.getObject(); - if (index >= 0) - --index; - else - index += object.size(); - - if (static_cast(index) >= object.size()) - return false; - std::tie(out_key, element) = object[index]; - return true; - } - } - - return {}; - } - - /// Performs moves of types MoveType::Key and MoveType::ConstKey. - template - static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) - { - if (!element.isObject()) - return false; - auto object = element.getObject(); - return object.find(key, element); - } - - static size_t calculateMaxSize(const ColumnString::Offsets & offsets) - { - size_t max_size = 0; - for (const auto i : collections::range(0, offsets.size())) - { - size_t size = offsets[i] - offsets[i - 1]; - max_size = std::max(max_size, size); - } - if (max_size) - --max_size; - return max_size; - } - -}; - -template -class JSONExtractImpl; - -template -class JSONExtractKeysAndValuesImpl; - -/** -* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. -* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` -* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of -* input arguments. -* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - -* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality -* if needed. -*/ -template typename Impl> -constexpr bool functionForcesTheReturnType() -{ - return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; -} - -template typename Impl> -class ExecutableFunctionJSON : public IExecutableFunction -{ - -public: - explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) - : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) - { - } - - String getName() const override { return Name::name; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - if (null_presence.has_null_constant) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - if constexpr (functionForcesTheReturnType()) - { - ColumnsWithTypeAndName columns_without_low_cardinality = arguments; - - for (auto & column : columns_without_low_cardinality) - { - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); - } - - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - else - { - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - } - -private: - - ColumnPtr - chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { -#if USE_SIMDJSON - if (allow_simdjson) - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - -#if USE_RAPIDJSON - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#else - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - } - - NullPresence null_presence; - bool allow_simdjson; - DataTypePtr json_return_type; -}; - - -template typename Impl> -class FunctionBaseFunctionJSON : public IFunctionBase -{ -public: - explicit FunctionBaseFunctionJSON( - const NullPresence & null_presence_, - bool allow_simdjson_, - DataTypes argument_types_, - DataTypePtr return_type_, - DataTypePtr json_return_type_) - : null_presence(null_presence_) - , allow_simdjson(allow_simdjson_) - , argument_types(std::move(argument_types_)) - , return_type(std::move(return_type_)) - , json_return_type(std::move(json_return_type_)) - { - } - - String getName() const override { return Name::name; } - - const DataTypes & getArgumentTypes() const override - { - return argument_types; - } - - const DataTypePtr & getResultType() const override - { - return return_type; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override - { - return std::make_unique>(null_presence, allow_simdjson, json_return_type); - } - -private: - NullPresence null_presence; - bool allow_simdjson; - DataTypes argument_types; - DataTypePtr return_type; - DataTypePtr json_return_type; -}; - -/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. -/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. -template typename Impl> -class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext -{ -public: - static constexpr auto name = Name::name; - - String getName() const override { return name; } - - static FunctionOverloadResolverPtr create(ContextPtr context_) - { - return std::make_unique(context_); - } - - explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override - { - bool has_nothing_argument = false; - for (const auto & arg : arguments) - has_nothing_argument |= isNothing(arg.type); - - DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); - NullPresence null_presence = getNullPresense(arguments); - DataTypePtr return_type; - if (has_nothing_argument) - return_type = std::make_shared(); - else if (null_presence.has_null_constant) - return_type = makeNullable(std::make_shared()); - else if (null_presence.has_nullable) - return_type = makeNullable(json_return_type); - else - return_type = json_return_type; - - /// Top-level LowCardinality columns are processed outside JSON parser. - json_return_type = removeLowCardinality(json_return_type); - - DataTypes argument_types; - argument_types.reserve(arguments.size()); - for (const auto & argument : arguments) - argument_types.emplace_back(argument.type); - return std::make_unique>( - null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); - } -}; - -struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; -struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; -struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; -struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; -struct NameJSONType { static constexpr auto name{"JSONType"}; }; -struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; -struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; -struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; -struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; -struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; -struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; -struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; -struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; -struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; - - -template -class JSONHasImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class IsValidJSONImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() != 1) - { - /// IsValidJSON() shouldn't get parameters other than JSON. - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", - String(function_name)); - } - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - /// This function is called only if JSON is valid. - /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class JSONLengthImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - size_t size; - if (element.isArray()) - size = element.getArray().size(); - else if (element.isObject()) - size = element.getObject().size(); - else - return false; - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(size); - return true; - } -}; - - -template -class JSONKeyImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) - { - if (last_key.empty()) - return false; - ColumnString & col_str = assert_cast(dest); - col_str.insertData(last_key.data(), last_key.size()); - return true; - } -}; - - -template -class JSONTypeImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - static const std::vector> values = { - {"Array", '['}, - {"Object", '{'}, - {"String", '"'}, - {"Int64", 'i'}, - {"UInt64", 'u'}, - {"Double", 'd'}, - {"Bool", 'b'}, - {"Null", 0}, /// the default value for the column. - }; - return std::make_shared>(values); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - UInt8 type; - switch (element.type()) - { - case ElementType::INT64: - type = 'i'; - break; - case ElementType::UINT64: - type = 'u'; - break; - case ElementType::DOUBLE: - type = 'd'; - break; - case ElementType::STRING: - type = '"'; - break; - case ElementType::ARRAY: - type = '['; - break; - case ElementType::OBJECT: - type = '{'; - break; - case ElementType::BOOL: - type = 'b'; - break; - case ElementType::NULL_VALUE: - type = 0; - break; - } - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(type); - return true; - } -}; - - -template -class JSONExtractNumericImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared>(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - NumberType value; - - switch (element.type()) - { - case ElementType::DOUBLE: - if constexpr (std::is_floating_point_v) - { - /// We permit inaccurate conversion of double to float. - /// Example: double 0.1 from JSON is not representable in float. - /// But it will be more convenient for user to perform conversion. - value = static_cast(element.getDouble()); - } - else if (!accurate::convertNumeric(element.getDouble(), value)) - return false; - break; - case ElementType::UINT64: - if (!accurate::convertNumeric(element.getUInt64(), value)) - return false; - break; - case ElementType::INT64: - if (!accurate::convertNumeric(element.getInt64(), value)) - return false; - break; - case ElementType::BOOL: - if constexpr (is_integer && convert_bool_to_integer) - { - value = static_cast(element.getBool()); - break; - } - return false; - case ElementType::STRING: - { - auto rb = ReadBufferFromMemory{element.getString()}; - if constexpr (std::is_floating_point_v) - { - if (!tryReadFloatText(value, rb) || !rb.eof()) - return false; - } - else - { - if (tryReadIntText(value, rb) && rb.eof()) - break; - - /// Try to parse float and convert it to integer. - Float64 tmp_float; - rb.position() = rb.buffer().begin(); - if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) - return false; - - if (!accurate::convertNumeric(tmp_float, value)) - return false; - } - break; - } - default: - return false; - } - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&value), sizeof(value)); - } - else - { - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); - } - return true; - } -}; - - -template -using JSONExtractInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractUInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractFloat64Impl = JSONExtractNumericImpl; - - -template -class JSONExtractBoolImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - bool value; - switch (element.type()) - { - case ElementType::BOOL: - value = element.getBool(); - break; - case ElementType::INT64: - value = element.getInt64() != 0; - break; - case ElementType::UINT64: - value = element.getUInt64() != 0; - break; - default: - return false; - } - - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(static_cast(value)); - return true; - } -}; - -template -class JSONExtractRawImpl; - -template -class JSONExtractStringImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); - - auto str = element.getString(); - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(str.data(), str.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - col_str.insertData(str.data(), str.size()); - } - return true; - } -}; - -/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. -template -struct JSONExtractTree -{ - using Element = typename JSONParser::Element; - - class Node - { - public: - Node() = default; - virtual ~Node() = default; - virtual bool insertResultToColumn(IColumn &, const Element &) = 0; - }; - - template - class NumericNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractNumericImpl::insertResultToColumn(dest, element, {}); - } - }; - - class LowCardinalityFixedStringNode : public Node - { - public: - explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - // If element is an object we delegate the insertion to JSONExtractRawImpl - if (element.isObject()) - return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); - else if (!element.isString()) - return false; - - auto str = element.getString(); - if (str.size() > fixed_length) - return false; - - // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. - // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) - // the data is padded here and written directly to the Low Cardinality Column - if (str.size() == fixed_length) - { - assert_cast(dest).insertData(str.data(), str.size()); - } - else - { - String padded_str(str); - padded_str.resize(fixed_length, '\0'); - - assert_cast(dest).insertData(padded_str.data(), padded_str.size()); - } - return true; - } - - private: - const size_t fixed_length; - }; - - class UUIDNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isString()) - return false; - - auto uuid = parseFromString(element.getString()); - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); - } - else - { - assert_cast(dest).insert(uuid); - } - return true; - } - }; - - template - class DecimalNode : public Node - { - public: - explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - const auto * type = assert_cast *>(data_type.get()); - - DecimalType value{}; - - switch (element.type()) - { - case ElementType::DOUBLE: - value = convertToDecimal, DataTypeDecimal>( - element.getDouble(), type->getScale()); - break; - case ElementType::UINT64: - value = convertToDecimal, DataTypeDecimal>( - element.getUInt64(), type->getScale()); - break; - case ElementType::INT64: - value = convertToDecimal, DataTypeDecimal>( - element.getInt64(), type->getScale()); - break; - case ElementType::STRING: { - auto rb = ReadBufferFromMemory{element.getString()}; - if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) - return false; - break; - } - default: - return false; - } - - assert_cast &>(dest).insertValue(value); - return true; - } - - private: - DataTypePtr data_type; - }; - - class StringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); - } - }; - - class FixedStringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); - - auto str = element.getString(); - auto & col_str = assert_cast(dest); - if (str.size() > col_str.getN()) - return false; - col_str.insertData(str.data(), str.size()); - - return true; - } - }; - - template - class EnumNode : public Node - { - public: - explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) - { - for (const auto & name_value_pair : name_value_pairs) - { - name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); - only_values.emplace(name_value_pair.second); - } - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & col_vec = assert_cast &>(dest); - - if (element.isInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isUInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isString()) - { - auto value = name_to_value_map.find(element.getString()); - if (value == name_to_value_map.end()) - return false; - col_vec.insertValue(value->second); - return true; - } - - return false; - } - - private: - std::vector> name_value_pairs; - std::unordered_map name_to_value_map; - std::unordered_set only_values; - }; - - class NullableNode : public Node - { - public: - explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - /// We do not need to handle nullability in that case - /// because nested node handles LowCardinality columns and will call proper overload of `insertData` - return nested->insertResultToColumn(dest, element); - } - - ColumnNullable & col_null = assert_cast(dest); - if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) - return false; - col_null.getNullMapColumn().insertValue(0); - return true; - } - - private: - std::unique_ptr nested; - }; - - class ArrayNode : public Node - { - public: - explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - - ColumnArray & col_arr = assert_cast(dest); - auto & data = col_arr.getData(); - size_t old_size = data.size(); - bool were_valid_elements = false; - - for (auto value : array) - { - if (nested->insertResultToColumn(data, value)) - were_valid_elements = true; - else - data.insertDefault(); - } - - if (!were_valid_elements) - { - data.popBack(data.size() - old_size); - return false; - } - - col_arr.getOffsets().push_back(data.size()); - return true; - } - - private: - std::unique_ptr nested; - }; - - class TupleNode : public Node - { - public: - TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) - { - for (size_t i = 0; i != explicit_names.size(); ++i) - name_to_index_map.emplace(explicit_names[i], i); - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - ColumnTuple & tuple = assert_cast(dest); - size_t old_size = dest.size(); - bool were_valid_elements = false; - - auto set_size = [&](size_t size) - { - for (size_t i = 0; i != tuple.tupleSize(); ++i) - { - auto & col = tuple.getColumn(i); - if (col.size() != size) - { - if (col.size() > size) - col.popBack(col.size() - size); - else - while (col.size() < size) - col.insertDefault(); - } - } - }; - - if (element.isArray()) - { - auto array = element.getArray(); - auto it = array.begin(); - - for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - if (element.isObject()) - { - auto object = element.getObject(); - if (name_to_index_map.empty()) - { - auto it = object.begin(); - for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - } - else - { - for (const auto & [key, value] : object) - { - auto index = name_to_index_map.find(key); - if (index != name_to_index_map.end()) - { - if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) - were_valid_elements = true; - } - } - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - return false; - } - - private: - std::vector> nested; - std::vector explicit_names; - std::unordered_map name_to_index_map; - }; - - class MapNode : public Node - { - public: - MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isObject()) - return false; - - ColumnMap & map_col = assert_cast(dest); - auto & offsets = map_col.getNestedColumn().getOffsets(); - auto & tuple_col = map_col.getNestedData(); - auto & key_col = tuple_col.getColumn(0); - auto & value_col = tuple_col.getColumn(1); - size_t old_size = tuple_col.size(); - - auto object = element.getObject(); - auto it = object.begin(); - for (; it != object.end(); ++it) - { - auto pair = *it; - - /// Insert key - key_col.insertData(pair.first.data(), pair.first.size()); - - /// Insert value - if (!value->insertResultToColumn(value_col, pair.second)) - value_col.insertDefault(); - } - - offsets.push_back(old_size + object.size()); - return true; - } - - private: - std::unique_ptr key; - std::unique_ptr value; - }; - - class VariantNode : public Node - { - public: - VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & column_variant = assert_cast(dest); - for (size_t i : order) - { - auto & variant = column_variant.getVariantByGlobalDiscriminator(i); - if (variant_nodes[i]->insertResultToColumn(variant, element)) - { - column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); - column_variant.getOffsets().push_back(variant.size() - 1); - return true; - } - } - - return false; - } - - private: - std::vector> variant_nodes; - /// Order in which we should try variants nodes. - /// For example, String should be always the last one. - std::vector order; - }; - - static std::unique_ptr build(const char * function_name, const DataTypePtr & type) - { - switch (type->getTypeId()) - { - case TypeIndex::UInt8: return std::make_unique>(); - case TypeIndex::UInt16: return std::make_unique>(); - case TypeIndex::UInt32: return std::make_unique>(); - case TypeIndex::UInt64: return std::make_unique>(); - case TypeIndex::UInt128: return std::make_unique>(); - case TypeIndex::UInt256: return std::make_unique>(); - case TypeIndex::Int8: return std::make_unique>(); - case TypeIndex::Int16: return std::make_unique>(); - case TypeIndex::Int32: return std::make_unique>(); - case TypeIndex::Int64: return std::make_unique>(); - case TypeIndex::Int128: return std::make_unique>(); - case TypeIndex::Int256: return std::make_unique>(); - case TypeIndex::Float32: return std::make_unique>(); - case TypeIndex::Float64: return std::make_unique>(); - case TypeIndex::String: return std::make_unique(); - case TypeIndex::FixedString: return std::make_unique(); - case TypeIndex::UUID: return std::make_unique(); - case TypeIndex::LowCardinality: - { - // The low cardinality case is treated in two different ways: - // For FixedString type, an especial class is implemented for inserting the data in the destination column, - // as the string length must be passed in order to check and pad the incoming data. - // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data - // as needed for the insertData function of the ColumnLowCardinality. - auto dictionary_type = typeid_cast(type.get())->getDictionaryType(); - if ((*dictionary_type).getTypeId() == TypeIndex::FixedString) - { - auto fixed_length = typeid_cast(dictionary_type.get())->getN(); - return std::make_unique(fixed_length); - } - return build(function_name, dictionary_type); - } - case TypeIndex::Decimal256: return std::make_unique>(type); - case TypeIndex::Decimal128: return std::make_unique>(type); - case TypeIndex::Decimal64: return std::make_unique>(type); - case TypeIndex::Decimal32: return std::make_unique>(type); - case TypeIndex::Enum8: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Enum16: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Nullable: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Array: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Tuple: - { - const auto & tuple = static_cast(*type); - const auto & tuple_elements = tuple.getElements(); - std::vector> elements; - elements.reserve(tuple_elements.size()); - for (const auto & tuple_element : tuple_elements) - elements.emplace_back(build(function_name, tuple_element)); - return std::make_unique(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); - } - case TypeIndex::Map: - { - const auto & map_type = static_cast(*type); - const auto & key_type = map_type.getKeyType(); - if (!isString(removeLowCardinality(key_type))) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {} with key type not String", - String(function_name), - type->getName()); - - const auto & value_type = map_type.getValueType(); - return std::make_unique(build(function_name, key_type), build(function_name, value_type)); - } - case TypeIndex::Variant: - { - const auto & variant_type = static_cast(*type); - const auto & variants = variant_type.getVariants(); - std::vector> variant_nodes; - variant_nodes.reserve(variants.size()); - for (const auto & variant : variants) - variant_nodes.push_back(build(function_name, variant)); - return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); - } - default: - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {}", - String(function_name), type->getName()); - } - } -}; - - -template -class JSONExtractImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the return data type, illegal value: {}", - String(function_name), col.name); - - return DataTypeFactory::instance().get(col_type_const->getValue()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - extract_tree = JSONExtractTree::build(function_name, result_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - return extract_tree->insertResultToColumn(dest, element); - } - -protected: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractKeysAndValuesImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the values' data type, illegal value: {}", - String(function_name), col.name); - - DataTypePtr key_type = std::make_unique(); - DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); - DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); - const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; - extract_tree = JSONExtractTree::build(function_name, value_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - size_t old_size = col_tuple.size(); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = col_tuple.getColumn(1); - - for (const auto & [key, value] : object) - { - if (extract_tree->insertResultToColumn(col_value, value)) - col_key.insertData(key.data(), key.size()); - } - - if (col_tuple.size() == old_size) - return false; - - col_arr.getOffsets().push_back(col_tuple.size()); - return true; - } - -private: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - auto & chars = col_str.getChars(); - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - chars.push_back(0); - col_str.getOffsets().push_back(chars.size()); - } - return true; - } - - // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column - static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) - { - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - auto & col_str = assert_cast(dest); - - if (chars.size() > col_str.getN()) - return false; - - chars.resize_fill(col_str.getN()); - col_str.insertData(reinterpret_cast(chars.data()), chars.size()); - - - return true; - } - - // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column - static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) - { - if (element.getObject().size() > fixed_length) - return false; - - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - if (chars.size() > fixed_length) - return false; - chars.resize_fill(fixed_length); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - - return true; - } - -private: - static void traverse(const Element & element, WriteBuffer & buf) - { - if (element.isInt64()) - { - writeIntText(element.getInt64(), buf); - return; - } - if (element.isUInt64()) - { - writeIntText(element.getUInt64(), buf); - return; - } - if (element.isDouble()) - { - writeFloatText(element.getDouble(), buf); - return; - } - if (element.isBool()) - { - if (element.getBool()) - writeCString("true", buf); - else - writeCString("false", buf); - return; - } - if (element.isString()) - { - writeJSONString(element.getString(), buf, formatSettings()); - return; - } - if (element.isArray()) - { - writeChar('[', buf); - bool need_comma = false; - for (auto value : element.getArray()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - traverse(value, buf); - } - writeChar(']', buf); - return; - } - if (element.isObject()) - { - writeChar('{', buf); - bool need_comma = false; - for (auto [key, value] : element.getObject()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - writeJSONString(key, buf, formatSettings()); - writeChar(':', buf); - traverse(value, buf); - } - writeChar('}', buf); - return; - } - if (element.isNull()) - { - writeCString("null", buf); - return; - } - } - - static const FormatSettings & formatSettings() - { - static const FormatSettings the_instance = [] - { - FormatSettings settings; - settings.json.escape_forward_slashes = false; - return settings; - }(); - return the_instance; - } -}; - - -template -class JSONExtractArrayRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - ColumnArray & col_res = assert_cast(dest); - - for (auto value : array) - JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); - - col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); - return true; - } -}; - - -template -class JSONExtractKeysAndValuesRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - DataTypePtr string_type = std::make_unique(); - DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = assert_cast(col_tuple.getColumn(1)); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); - } - - col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); - return true; - } -}; - -template -class JSONExtractKeysImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_unique(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - ColumnArray & col_res = assert_cast(dest); - auto & col_key = assert_cast(col_res.getData()); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - } - - col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); - return true; - } -}; - -} diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 24a40c45c6e..f5fb08f71d2 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -1,9 +1,12 @@ #pragma once -#include #include +#include #include +#include +#include #include +#include namespace DB { @@ -48,45 +51,75 @@ struct ReplaceRegexpImpl static constexpr int max_captures = 10; - static Instructions createInstructions(std::string_view replacement, int num_captures) + /// The replacement string references must not contain non-existing capturing groups. + static void checkSubstitutions(std::string_view replacement, int num_captures) { - Instructions instructions; - - String literals; for (size_t i = 0; i < replacement.size(); ++i) { if (replacement[i] == '\\' && i + 1 < replacement.size()) { - if (isNumericASCII(replacement[i + 1])) /// Substitution + if (isNumericASCII(replacement[i + 1])) /// substitution + { + int substitution_num = replacement[i + 1] - '0'; + if (substitution_num >= num_captures) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Substitution '\\{}' in replacement argument is invalid, regexp has only {} capturing groups", substitution_num, num_captures - 1); + } + } + } + } + + static Instructions createInstructions(std::string_view replacement, int num_captures) + { + checkSubstitutions(replacement, num_captures); + + Instructions instructions; + + String literals; + literals.reserve(replacement.size()); + + for (size_t i = 0; i < replacement.size(); ++i) + { + if (replacement[i] == '\\' && i + 1 < replacement.size()) + { + if (isNumericASCII(replacement[i + 1])) /// substitution { if (!literals.empty()) { instructions.emplace_back(literals); literals = ""; } - instructions.emplace_back(replacement[i + 1] - '0'); + int substitution_num = replacement[i + 1] - '0'; + instructions.emplace_back(substitution_num); } else - literals += replacement[i + 1]; /// Escaping + literals += replacement[i + 1]; /// escaping ++i; } else - literals += replacement[i]; /// Plain character + literals += replacement[i]; /// plain character } if (!literals.empty()) instructions.emplace_back(literals); - for (const auto & instr : instructions) - if (instr.substitution_num >= num_captures) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups", - instr.substitution_num, num_captures - 1); - return instructions; } + static bool canFallbackToStringReplacement(const String & needle, const String & replacement, const re2::RE2 & searcher, int num_captures) + { + if (searcher.NumberOfCapturingGroups()) + return false; + + checkSubstitutions(replacement, num_captures); + + String required_substring; + bool is_trivial; + bool required_substring_is_prefix; + std::vector alternatives; + OptimizedRegularExpression::analyze(needle, required_substring, is_trivial, required_substring_is_prefix, alternatives); + return is_trivial && required_substring_is_prefix && required_substring == needle; + } + static void processString( const char * haystack_data, size_t haystack_length, @@ -124,7 +157,7 @@ struct ReplaceRegexpImpl { std::string_view replacement; if (instr.substitution_num >= 0) - replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size()); + replacement = {matches[instr.substitution_num].data(), matches[instr.substitution_num].size()}; else replacement = instr.literal; res_data.resize(res_data.size() + replacement.size()); @@ -179,19 +212,32 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); + /// Try to use non-regexp string replacement. This shortcut is implemented only for const-needles + const-replacement as + /// pattern analysis incurs some cost too. + if (canFallbackToStringReplacement(needle, replacement, searcher, num_captures)) + { + auto convertTrait = [](ReplaceRegexpTraits::Replace first_or_all) + { + switch (first_or_all) + { + case ReplaceRegexpTraits::Replace::First: return ReplaceStringTraits::Replace::First; + case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All; + } + }; + ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets); + return; + } + Instructions instructions = createInstructions(replacement, num_captures); - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -221,10 +267,8 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -242,6 +286,7 @@ struct ReplaceRegexpImpl re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); @@ -270,17 +315,14 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -290,8 +332,9 @@ struct ReplaceRegexpImpl size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); + std::string_view replacement(repl_data, repl_length); - Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures); + Instructions instructions = createInstructions(replacement, num_captures); processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; @@ -317,10 +360,8 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -338,12 +379,14 @@ struct ReplaceRegexpImpl size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); + std::string_view replacement(repl_data, repl_length); re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures); + Instructions instructions = createInstructions(replacement, num_captures); processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; @@ -367,16 +410,13 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - Instructions instructions = createInstructions(replacement, num_captures); for (size_t i = 0; i < haystack_size; ++i) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 499fe4ce7b2..3e2a3bf6863 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -103,27 +103,40 @@ private: sorted_labels[i].label = label; } - /// Stable sort is required for for labels to apply in same order if score is equal - std::stable_sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); + /// Sorting scores in descending order to traverse the ROC curve from left to right + std::sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); /// We will first calculate non-normalized area. - size_t area = 0; - size_t count_positive = 0; + Float64 area = 0.0; + Float64 prev_score = sorted_labels[0].score; + size_t prev_fp = 0, prev_tp = 0; + size_t curr_fp = 0, curr_tp = 0; for (size_t i = 0; i < size; ++i) { + // Only increment the area when the score changes + if (sorted_labels[i].score != prev_score) + { + area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle) + prev_fp = curr_fp; + prev_tp = curr_tp; + prev_score = sorted_labels[i].score; + } + if (sorted_labels[i].label) - ++count_positive; /// The curve moves one step up. No area increase. + curr_tp += 1; /// The curve moves one step up. else - area += count_positive; /// The curve moves one step right. Area is increased by 1 * height = count_positive. + curr_fp += 1; /// The curve moves one step right. } - /// Then divide the area to the area of rectangle. + area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; - if (count_positive == 0 || count_positive == size) + /// Then normalize it dividing by the area to the area of rectangle. + + if (curr_tp == 0 || curr_tp == size) return std::numeric_limits::quiet_NaN(); - return static_cast(area) / count_positive / (size - count_positive); + return area / curr_tp / (size - curr_tp); } static void vector( diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fa9b3dc92dd..0782f109187 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -13,6 +14,8 @@ #include #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include #include @@ -28,6 +31,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } using NullMap = PaddedPODArray; @@ -424,31 +428,21 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - if constexpr (std::is_same_v) + if (auto res = executeMap(arguments, result_type)) + return res; + + if (auto res = executeArrayLowCardinality(arguments)) + return res; + + auto new_arguments = arguments; + + for (auto & argument : new_arguments) { - if (isMap(arguments[0].type)) - { - auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); - - const auto & map_column = assert_cast(*non_const_map_column); - const auto & map_array_column = map_column.getNestedColumn(); - auto offsets = map_array_column.getOffsetsPtr(); - auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(keys, offsets); - - const auto & type_map = assert_cast(*arguments[0].type); - auto array_type = std::make_shared(type_map.getKeyType()); - - auto arguments_copy = arguments; - arguments_copy[0].column = std::move(array_column); - arguments_copy[0].type = std::move(array_type); - arguments_copy[0].name = arguments[0].name; - - return executeArrayImpl(arguments_copy, result_type); - } + argument.column = recursiveRemoveLowCardinality(argument.column); + argument.type = recursiveRemoveLowCardinality(argument.type); } - return executeArrayImpl(arguments, result_type); + return executeArrayImpl(new_arguments, result_type); } private: @@ -458,18 +452,6 @@ private: using NullMaps = std::pair; - struct ExecutionData - { - const IColumn& left; - const IColumn& right; - const ColumnArray::Offsets& offsets; - ColumnPtr result_column; - NullMaps maps; - ResultColumnPtr result { ResultColumnType::create() }; - - void moveResult() { result_column = std::move(result); } - }; - static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); @@ -574,23 +556,13 @@ private: } } -#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 +#define INTEGRAL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 ColumnPtr executeOnNonNullable(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { - if (const auto* const left_arr = checkAndGetColumn(arguments[0].column.get())) - { - if (checkAndGetColumn(&left_arr->getData())) - { - if (auto res = executeLowCardinality(arguments)) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); - } - } - ColumnPtr res; - if (!((res = executeIntegral(arguments)) + if (!((res = executeNothing(arguments)) + || (res = executeIntegral(arguments)) || (res = executeConst(arguments, result_type)) || (res = executeString(arguments)) || (res = executeGeneric(arguments)))) @@ -599,6 +571,8 @@ private: return res; } +#undef INTEGRAL_PACK + /** * The Array's internal data type may be quite tricky (containing a Nullable type somewhere). To process the * Nullable types correctly, for each data type specialisation we provide two null maps (one for the data and one @@ -627,6 +601,14 @@ private: return {null_map_data, null_map_item}; } + struct ExecutionData + { + const IColumn & left; + const IColumn & right; + const ColumnArray::Offsets & offsets; + NullMaps null_maps; + }; + /** * Given a variadic pack #Integral, apply executeIntegralExpanded with such parameters: * Integral s = {s1, s2, ...} @@ -635,39 +617,33 @@ private: template static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); - - if (!left) + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) return nullptr; - const ColumnPtr right_converted_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn& right = *right_converted_ptr.get(); - - ExecutionData data = { - left->getData(), - right, - left->getOffsets(), - nullptr, - getNullMaps(arguments) + ExecutionData data + { + .left = array->getData(), + .right = *arguments[1].column, + .offsets = array->getOffsets(), + .null_maps = getNullMaps(arguments), }; - if (executeIntegral(data)) - return data.result_column; - - return nullptr; + auto result = ResultColumnType::create(); + return executeIntegral(data, *result) ? std::move(result) : nullptr; } template - static bool executeIntegral(ExecutionData& data) + static bool executeIntegral(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralExpanded(data) || ...); + return (executeIntegralExpanded(data, result) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralImpl(data) || ...); + return (executeIntegralImpl(data, result) || ...); } /** @@ -676,40 +652,31 @@ private: * so we have to check all possible variants for #Initial and #Resulting types. */ template - static bool executeIntegralImpl(ExecutionData& data) + static bool executeIntegralImpl(const ExecutionData & data, ResultColumnType & result) { - const ColumnVector * col_nested = checkAndGetColumn>(&data.left); - - if (!col_nested) + const auto * left_typed = checkAndGetColumn>(&data.left); + if (!left_typed) return false; - const auto [null_map_data, null_map_item] = data.maps; - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto item_arg_const = checkAndGetColumnConst>(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConst>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_const->template getValue(), - data.result->getData(), - null_map_data, + result.getData(), + data.null_maps.first, nullptr); - else if (const auto item_arg_vector = checkAndGetColumn>(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_vector->getData(), - data.result->getData(), - null_map_data, - null_map_item); + result.getData(), + data.null_maps.first, + data.null_maps.second); else return false; - data.moveResult(); return true; } @@ -724,227 +691,161 @@ private: * * Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 . */ - static ColumnPtr executeLowCardinality(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeArrayLowCardinality(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array_const = checkAndGetColumnConstData(arguments[0].column.get()); - if (!col_array) + if (!col_array && !col_array_const) return nullptr; - const ColumnLowCardinality * const col_lc = checkAndGetColumn(&col_array->getData()); + if (col_array_const) + col_array = col_array_const; - if (!col_lc) + const auto * left_lc = checkAndGetColumn(&col_array->getData()); + if (!left_lc) return nullptr; - const auto [null_map_data, null_map_item] = getNullMaps(arguments); + const auto * right_const = checkAndGetColumn(arguments[1].column.get()); + if (!right_const) + return nullptr; - if (const ColumnConst * col_arg_const = checkAndGetColumn(&*arguments[1].column)) + const auto & array_type = assert_cast(*arguments[0].type); + const auto target_type = recursiveRemoveLowCardinality(array_type.getNestedType()); + auto right = recursiveRemoveLowCardinality(right_const->getDataColumnPtr()); + + UInt64 index = 0; + UInt64 left_size = arguments[0].column->size(); + ResultColumnPtr col_result = ResultColumnType::create(); + + if (!right->isNullAt(0)) { - const IColumnUnique & col_lc_dict = col_lc->getDictionary(); + auto right_type = recursiveRemoveLowCardinality(arguments[1].type); + right = castColumn({right, right_type, ""}, target_type); - const DataTypeArray * const array_type = checkAndGetDataType(arguments[0].type.get()); - const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType()); + if (right->isNullable()) + right = checkAndGetColumn(*right).getNestedColumnPtr(); - ColumnPtr col_arg_cloned = castColumn( - {col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr); + StringRef elem = right->getDataAt(0); + const auto & left_dict = left_lc->getDictionary(); - ResultColumnPtr col_result = ResultColumnType::create(); - UInt64 index = 0; - - if (!col_arg_cloned->isNullAt(0)) + if (std::optional maybe_index = left_dict.getOrFindValueIndex(elem); maybe_index) { - if (col_arg_cloned->isNullable()) - col_arg_cloned = checkAndGetColumn(*col_arg_cloned).getNestedColumnPtr(); - - StringRef elem = col_arg_cloned->getDataAt(0); - - if (std::optional maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index) - { - index = *maybe_index; - } - else - { - const size_t offsets_size = col_array->getOffsets().size(); - auto & data = col_result->getData(); - - data.resize_fill(offsets_size); - - return col_result; - } + index = *maybe_index; } - - Impl::Main::vector( - col_lc->getIndexes(), - col_array->getOffsets(), - index, /** Assuming LowCardinality has index of NULL always as zero. */ - col_result->getData(), - null_map_data, - null_map_item); - - return col_result; - } - else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U - { - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T) - const ColumnNullable & left_nullable = checkAndGetColumn(*left_casted); - - const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData(); - - const IColumn & left_ptr = left_nullable.getNestedColumn(); - - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - const ColumnNullable * const right_nullable = checkAndGetColumn(right_casted.get()); - - const NullMap * const null_map_right_casted = right_nullable - ? &right_nullable->getNullMapColumn().getData() - : null_map_item; - - const IColumn& right_ptr = right_nullable - ? right_nullable->getNestedColumn() - : *right_casted.get(); - - ExecutionData data = + else { - left_ptr, right_ptr, - col_array->getOffsets(), - nullptr, - {null_map_left_casted, null_map_right_casted}}; + col_result->getData().resize_fill(col_array->size()); - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; + } } - else // LowCardinality(T) and U, T not Nullable - { - if (arguments[1].column->isNullable()) - return nullptr; - - if (const auto* const arg_lc = checkAndGetColumn(arguments[1].column.get()); - arg_lc && arg_lc->isNullable()) - return nullptr; - - // LowCardinality(T) and U (possibly LowCardinality(V)) - - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - - ExecutionData data = - { - *left_casted.get(), *right_casted.get(), col_array->getOffsets(), - nullptr, {null_map_data, null_map_item} - }; - - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; - } - - return nullptr; - } - - static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data) - { - if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays - return executeIntegral(data); - - if (checkAndGetColumn(&data.left)) - return executeStringImpl(data); Impl::Main::vector( - data.left, - data.offsets, data.right, - data.result->getData(), - data.maps.first, data.maps.second); + left_lc->getIndexes(), + col_array->getOffsets(), + index, /** Assuming LowCardinality has index of NULL always as zero. */ + col_result->getData(), + nullptr, + nullptr); - data.moveResult(); - return true; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; } -#undef INTEGRAL_TPL_PACK + ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + { + if constexpr (!std::is_same_v) + return nullptr; + + if (!isMap(arguments[0].type)) + return nullptr; + + auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); + + const auto & map_column = assert_cast(*non_const_map_column); + const auto & map_array_column = map_column.getNestedColumn(); + auto offsets = map_array_column.getOffsetsPtr(); + auto keys = map_column.getNestedData().getColumnPtr(0); + auto array_column = ColumnArray::create(keys, offsets); + + const auto & type_map = assert_cast(*arguments[0].type); + auto array_type = std::make_shared(type_map.getKeyType()); + + auto arguments_copy = arguments; + arguments_copy[0].column = std::move(array_column); + arguments_copy[0].type = std::move(array_type); + arguments_copy[0].name = arguments[0].name; + + return executeArrayImpl(arguments_copy, result_type); + } static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); - + const auto * array = checkAndGetColumn(arguments[0].column.get()); if (!array) return nullptr; - const ColumnString * left = checkAndGetColumn(&array->getData()); - + const auto * left = checkAndGetColumn(&array->getData()); if (!left) return nullptr; - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn & right = *right_ptr.get(); + const auto & right = *arguments[1].column; + const auto [null_map_data, null_map_item] = getNullMaps(arguments); - ExecutionData data = { - *left, right, array->getOffsets(), - nullptr, getNullMaps(arguments), - std::move(ResultColumnType::create()) - }; + auto result = ResultColumnType::create(); - if (executeStringImpl(data)) - return data.result_column; - - return nullptr; - } - - static bool executeStringImpl(ExecutionData& data) - { - const auto [null_map_data, null_map_item] = data.maps; - const ColumnString& left = *typeid_cast(&data.left); - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto *const item_arg_const = checkAndGetColumnConstStringOrFixedString(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConstStringOrFixedString(&right)) { - const ColumnString * item_const_string = - checkAndGetColumn(&item_arg_const->getDataColumn()); - - const ColumnFixedString * item_const_fixedstring = - checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_string = checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_fixedstring = checkAndGetColumn(&item_arg_const->getDataColumn()); if (item_const_string) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_string->getChars(), item_const_string->getDataAt(0).size, - data.result->getData(), + result->getData(), null_map_data, null_map_item); else if (item_const_fixedstring) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_fixedstring->getChars(), item_const_fixedstring->getN(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnConst contains not String nor FixedString column"); } - else if (const auto *const item_arg_vector = checkAndGetColumn(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn(&right)) { Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_arg_vector->getChars(), item_arg_vector->getOffsets(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); } else - return false; + { + return nullptr; + } - data.moveResult(); - return true; + return result; } static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) @@ -955,9 +856,7 @@ private: return nullptr; Array arr = col_array->getValue(); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn * item_arg = right_ptr.get(); + const IColumn * item_arg = arguments[1].column.get(); if (isColumnConst(*item_arg)) { @@ -1026,48 +925,59 @@ private: } } + static ColumnPtr executeNothing(const ColumnsWithTypeAndName & arguments) + { + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + return nullptr; + + if (arguments[1].column->onlyNull()) + { + auto result = ResultColumnType::create(); + Impl::Null::process(array->getOffsets(), result->getData(), getNullMaps(arguments).first); + return result; + } + + return nullptr; + } + static ColumnPtr executeGeneric(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * col = checkAndGetColumn(arguments[0].column.get()); - - if (!col) + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) return nullptr; DataTypePtr array_elements_type = assert_cast(*arguments[0].type).getNestedType(); const DataTypePtr & index_type = arguments[1].type; - DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, index_type}); - - ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type); + DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, arguments[1].type}); + ColumnPtr col_nested = castColumn({ col_array->getDataPtr(), array_elements_type, "" }, common_type); + ColumnPtr item_arg = castColumn({ arguments[1].column, removeLowCardinality(index_type), "" }, common_type); auto col_res = ResultColumnType::create(); auto [null_map_data, null_map_item] = getNullMaps(arguments); - if (item_arg->onlyNull()) - Impl::Null::process( - col->getOffsets(), - col_res->getData(), - null_map_data); - else if (isColumnConst(*item_arg)) + if (const auto * item_arg_const = checkAndGetColumn(item_arg.get())) + { Impl::Main::vector( *col_nested, - col->getOffsets(), - typeid_cast(*item_arg).getDataColumn(), + col_array->getOffsets(), + item_arg_const->getDataColumn(), col_res->getData(), /// TODO This is wrong. null_map_data, nullptr); + } else + { Impl::Main::vector( *col_nested, - col->getOffsets(), + col_array->getOffsets(), *item_arg, col_res->getData(), null_map_data, null_map_item); + } return col_res; } diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index c83195bc976..71f50abba6c 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -5,7 +5,17 @@ namespace DB REGISTER_FUNCTION(Tuple) { - factory.registerFunction(); + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns a tuple by grouping input arguments. + +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. +Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. + +The function implements the operator `(x, y, ...)`. +)", + .examples{{"typical", "SELECT tuple(1, 2)", "(1,2)"}}, + .categories{"Miscellaneous"}}); } } diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h index 8b3e041f781..94529d86861 100644 --- a/src/Functions/tuple.h +++ b/src/Functions/tuple.h @@ -6,20 +6,28 @@ #include #include #include +#include namespace DB { -/** tuple(x, y, ...) is a function that allows you to group several columns +/** tuple(x, y, ...) is a function that allows you to group several columns. * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. */ class FunctionTuple : public IFunction { + bool enable_named_columns; + public: static constexpr auto name = "tuple"; /// maybe_unused: false-positive - [[ maybe_unused ]] static FunctionPtr create(ContextPtr) { return std::make_shared(); } + [[maybe_unused]] static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context->getSettingsRef().enable_named_columns_in_function_tuple); + } + + explicit FunctionTuple(bool enable_named_columns_ = false) : enable_named_columns(enable_named_columns_) { } String getName() const override { return name; } @@ -38,9 +46,26 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - return std::make_shared(arguments); + if (arguments.empty()) + return std::make_shared(DataTypes{}); + + DataTypes types; + Names names; + NameSet name_set; + for (const auto & argument : arguments) + { + types.emplace_back(argument.type); + names.emplace_back(argument.name); + name_set.emplace(argument.name); + } + + if (enable_named_columns && name_set.size() == names.size() + && std::all_of(names.cbegin(), names.cend(), [](const auto & n) { return isUnquotedIdentifier(n); })) + return std::make_shared(types, names); + else + return std::make_shared(types); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -53,9 +78,9 @@ public: for (size_t i = 0; i < tuple_size; ++i) { /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); } return ColumnTuple::create(tuple_columns); diff --git a/src/Functions/tupleNames.cpp b/src/Functions/tupleNames.cpp new file mode 100644 index 00000000000..e444478c224 --- /dev/null +++ b/src/Functions/tupleNames.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** Transform a named tuple into names, which is a constant array of strings. + */ +class ExecutableFunctionTupleNames : public IExecutableFunction +{ +public: + static constexpr auto name = "tupleNames"; + + explicit ExecutableFunctionTupleNames(Array name_fields_) : name_fields(std::move(name_fields_)) { } + + String getName() const override { return name; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return result_type->createColumnConst(input_rows_count, name_fields); + } + +private: + Array name_fields; +}; + +class FunctionBaseTupleNames : public IFunctionBase +{ +public: + static constexpr auto name = "tupleNames"; + + explicit FunctionBaseTupleNames(DataTypePtr argument_type, DataTypePtr result_type_, Array name_fields_) + : argument_types({std::move(argument_type)}), result_type(std::move(result_type_)), name_fields(std::move(name_fields_)) + { + } + + String getName() const override { return name; } + + bool isSuitableForConstantFolding() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + + const DataTypePtr & getResultType() const override { return result_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique(name_fields); + } + +private: + DataTypes argument_types; + DataTypePtr result_type; + Array name_fields; +}; + +class TupleNamesOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "tupleNames"; + + static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + return std::make_shared(std::make_shared()); + } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + DataTypes types = tuple->getElements(); + Array name_fields; + for (const auto & elem_name : tuple->getElementNames()) + name_fields.emplace_back(elem_name); + + return std::make_unique(arguments[0].type, result_type, std::move(name_fields)); + } +}; + +} + +REGISTER_FUNCTION(TupleNames) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. +)", + .examples{{"typical", "SELECT tupleNames(tuple(1 as a, 2 as b))", "['a','b']"}}, + .categories{"Miscellaneous"}}); +} + +} diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 0ec733f7840..da38bccdea1 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; - [[maybe_unused]] bool allow_gwp_asan_force_sample; + [[maybe_unused]] bool allow_gwp_asan_force_sample{false}; Memory() = default; diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 303ffb744b5..b753e66da48 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -713,8 +713,12 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() /// fall back to slow whole-file reads when HEAD is actually supported; that sounds /// like a nightmare to debug.) if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 && - e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS) + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS && + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_REQUEST_TIMEOUT && + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_MISDIRECTED_REQUEST) + { return HTTPFileInfo{}; + } throw; } diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index 12b85c483a1..fc9d9c7dcd1 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -59,10 +59,10 @@ clickhouse_add_executable (parse_date_time_best_effort parse_date_time_best_effo target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) clickhouse_add_executable (zlib_ng_bug zlib_ng_bug.cpp) -target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib) +target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io) clickhouse_add_executable (dragonbox_test dragonbox_test.cpp) -target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars) +target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io) clickhouse_add_executable (zstd_buffers zstd_buffers.cpp) target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io) diff --git a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp index 24467923542..99a7a73d46c 100644 --- a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp @@ -29,33 +29,12 @@ BlockIO InterpreterSetRoleQuery::execute() void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) { - auto & access_control = getContext()->getAccessControl(); auto session_context = getContext()->getSessionContext(); - auto user = session_context->getUser(); if (query.kind == ASTSetRoleQuery::Kind::SET_ROLE_DEFAULT) - { session_context->setCurrentRolesDefault(); - } else - { - RolesOrUsersSet roles_from_query{*query.roles, access_control}; - std::vector new_current_roles; - if (roles_from_query.all) - { - new_current_roles = user->granted_roles.findGranted(roles_from_query); - } - else - { - for (const auto & id : roles_from_query.getMatchingIDs()) - { - if (!user->granted_roles.isGranted(id)) - throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set current"); - new_current_roles.emplace_back(id); - } - } - session_context->setCurrentRoles(new_current_roles); - } + session_context->setCurrentRoles(RolesOrUsersSet{*query.roles, session_context->getAccessControl()}); } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index c3285d73145..9efb1d89a47 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -405,10 +405,6 @@ Block createBlockForSet( } -ScopeStack::Level::Level() = default; -ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level(Level &&) noexcept = default; - FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) { @@ -462,6 +458,7 @@ public: for (const auto * node : index) map.emplace(node->result_name, node); } + ~Index() = default; void addNode(const ActionsDAG::Node * node) { @@ -502,6 +499,10 @@ public: } }; +ScopeStack::Level::Level() = default; +ScopeStack::Level::~Level() = default; +ScopeStack::Level::Level(Level &&) noexcept = default; + ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 45b43ae2d3a..e073b7a49b6 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -78,115 +77,6 @@ namespace ErrorCodes namespace { -/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes. - */ -class HashTablesStatistics -{ -public: - struct Entry - { - size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning - size_t median_size; // roughly the size we're going to preallocate on each thread - }; - - using Cache = DB::CacheBase; - using CachePtr = std::shared_ptr; - using Params = DB::Aggregator::Params::StatsCollectingParams; - - /// Collection and use of the statistics should be enabled. - std::optional getSizeHint(const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - if (const auto hint = cache->get(params.key)) - { - LOG_TRACE( - getLogger("Aggregator"), - "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", - params.key, - hint->sum_of_sizes, - hint->median_size); - return *hint; - } - return std::nullopt; - } - - /// Collection and use of the statistics should be enabled. - void update(size_t sum_of_sizes, size_t median_size, const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - const auto hint = cache->get(params.key); - // We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong. - if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 - || hint->median_size < median_size) - { - LOG_TRACE( - getLogger("Aggregator"), - "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", - params.key, - sum_of_sizes, - median_size); - cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); - } - } - - std::optional getCacheStats() const - { - std::lock_guard lock(mutex); - if (hash_table_stats) - { - size_t hits = 0, misses = 0; - hash_table_stats->getStats(hits, misses); - return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; - } - return std::nullopt; - } - - static size_t calculateCacheKey(const DB::ASTPtr & select_query) - { - if (!select_query) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); - - const auto & select = select_query->as(); - - // It may happen in some corner cases like `select 1 as num group by num`. - if (!select.tables()) - return 0; - - SipHash hash; - hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/ true)); - if (const auto where = select.where()) - hash.update(where->getTreeHash(/*ignore_aliases=*/ true)); - if (const auto group_by = select.groupBy()) - hash.update(group_by->getTreeHash(/*ignore_aliases=*/ true)); - return hash.get64(); - } - -private: - CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &) - { - if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) - hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); - return hash_table_stats; - } - - mutable std::mutex mutex; - CachePtr hash_table_stats; -}; - -HashTablesStatistics & getHashTablesStatistics() -{ - static HashTablesStatistics hash_tables_stats; - return hash_tables_stats; -} - bool worthConvertToTwoLevel( size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes) { @@ -215,49 +105,29 @@ void initDataVariantsWithSizeHint( DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params) { const auto & stats_collecting_params = params.stats_collecting_params; - if (stats_collecting_params.isCollectionAndUseEnabled()) + const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; + if (auto hint = getSizeHint(stats_collecting_params, /*tables_cnt=*/max_threads)) { - if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params)) - { - const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; - const auto lower_limit = hint->sum_of_sizes / max_threads; - const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads; - if (hint->median_size > upper_limit) - { - /// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway. - /// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown. - /// So let's just do nothing. - LOG_TRACE( - getLogger("Aggregator"), - "No space were preallocated in hash tables because 'max_size_to_preallocate_for_aggregation' has too small value: {}, " - "should be at least {}", - stats_collecting_params.max_size_to_preallocate_for_aggregation, - hint->median_size * max_threads); - } - /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 - else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) - { - const auto adjusted = std::max(lower_limit, hint->median_size); - if (worthConvertToTwoLevel( - params.group_by_two_level_threshold, - hint->sum_of_sizes, - /*group_by_two_level_threshold_bytes*/ 0, - /*result_size_bytes*/ 0)) - method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); - result.init(method_chosen, adjusted); - ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); - return; - } - } + if (worthConvertToTwoLevel( + params.group_by_two_level_threshold, + hint->sum_of_sizes, + /*group_by_two_level_threshold_bytes*/ 0, + /*result_size_bytes*/ 0)) + method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); + result.init(method_chosen, hint->median_size); } - result.init(method_chosen); + else + { + result.init(method_chosen); + } + ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); } /// Collection and use of the statistics should be enabled. -void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params) +void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::StatsCollectingParams & params) { if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + return; std::vector sizes(data_variants.size()); for (size_t i = 0; i < data_variants.size(); ++i) @@ -265,7 +135,7 @@ void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, cons const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though... std::nth_element(sizes.begin(), median_size, sizes.end()); const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull); - getHashTablesStatistics().update(sum_of_sizes, *median_size, params); + DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params); } DB::ColumnNumbers calculateKeysPositions(const DB::Block & header, const DB::Aggregator::Params & params) @@ -300,24 +170,6 @@ size_t getMinBytesForPrefetch() namespace DB { -std::optional getHashTablesCacheStatistics() -{ - return getHashTablesStatistics().getCacheStats(); -} - -Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default; - -Aggregator::Params::StatsCollectingParams::StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_) - : key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0) - , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) - , max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_) -{ -} - Block Aggregator::getHeader(bool final) const { return params.getHeader(header, final); @@ -2783,8 +2635,7 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData LOG_TRACE(log, "Merging aggregated data"); - if (params.stats_collecting_params.isCollectionAndUseEnabled()) - updateStatistics(data_variants, params.stats_collecting_params); + updateStatistics(data_variants, params.stats_collecting_params); ManyAggregatedDataVariants non_empty_data; non_empty_data.reserve(data_variants.size()); @@ -3486,4 +3337,23 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } +UInt64 calculateCacheKey(const DB::ASTPtr & select_query) +{ + if (!select_query) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); + + const auto & select = select_query->as(); + + // It may happen in some corner cases like `select 1 as num group by num`. + if (!select.tables()) + return 0; + + SipHash hash; + hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true)); + if (const auto where = select.where()) + hash.update(where->getTreeHash(/*ignore_aliases=*/true)); + if (const auto group_by = select.groupBy()) + hash.update(group_by->getTreeHash(/*ignore_aliases=*/true)); + return hash.get64(); +} } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 406d28597cf..f4f1e9a1df3 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -39,9 +39,10 @@ #include -#include #include #include +#include +#include namespace DB { @@ -128,24 +129,6 @@ public: const double min_hit_rate_to_use_consecutive_keys_optimization; - struct StatsCollectingParams - { - StatsCollectingParams(); - - StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_); - - bool isCollectionAndUseEnabled() const { return key != 0; } - void disable() { key = 0; } - - UInt64 key = 0; - const size_t max_entries_for_hash_table_stats = 0; - const size_t max_size_to_preallocate_for_aggregation = 0; - }; - StatsCollectingParams stats_collecting_params; Params( @@ -674,6 +657,7 @@ private: Arena * arena); }; +UInt64 calculateCacheKey(const DB::ASTPtr & select_query); /** Get the aggregation variant by its type. */ template Method & getDataVariant(AggregatedDataVariants & variants); @@ -685,13 +669,4 @@ APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M - -struct HashTablesCacheStatistics -{ - size_t entries = 0; - size_t hits = 0; - size_t misses = 0; -}; - -std::optional getHashTablesCacheStatistics(); } diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dd1166a9228..56055e7044c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter( - query, - query_context, - query_context->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_insert */ false); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -732,7 +726,10 @@ try /// Access rights must be checked for the user who executed the initial INSERT query. if (key.user_id) - insert_context->setUser(*key.user_id, key.current_roles); + { + insert_context->setUser(*key.user_id); + insert_context->setCurrentRoles(key.current_roles); + } insert_context->setSettings(key.settings); @@ -787,12 +784,7 @@ try try { interpreter = std::make_unique( - key.query, - insert_context, - key.settings.insert_allow_materialized_columns, - false, - false, - true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1011,7 +1003,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } @@ -1063,7 +1055,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index a5c8f3c0cf4..0e85ead3265 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -1,11 +1,16 @@ #pragma once + + #include +#include #include #include #include #include #include #include + +#include #include namespace DB @@ -30,7 +35,7 @@ struct FileSegmentMetadata : private boost::noncopyable explicit FileSegmentMetadata(FileSegmentPtr && file_segment_); - bool releasable() const { return file_segment.unique(); } + bool releasable() const { return isSharedPtrUnique(file_segment); } size_t size() const; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 91c0c592f28..5d56ef09127 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -8,23 +8,28 @@ #include #include #include +#include #include -#include -#include #include +#include +#include +#include +#include +#include #include +#include #include #include -#include #include +#include #include +#include #include #include #include -#include +#include #include -#include -#include +#include namespace DB { @@ -33,7 +38,6 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; - extern const int CLUSTER_DOESNT_EXIST; extern const int UNEXPECTED_CLUSTER; } @@ -172,7 +176,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, /// in case of parallel replicas custom key use round robing load balancing /// so custom key partitions will be spread over nodes in round-robin fashion - if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed) + if (context->canUseParallelReplicasCustomKeyForCluster(cluster) && !settings.load_balancing.changed) { new_settings.load_balancing = LoadBalancing::ROUND_ROBIN; } @@ -180,6 +184,10 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); new_context->setClientInfo(new_client_info); + + if (context->canUseParallelReplicasCustomKeyForCluster(cluster)) + new_context->disableOffsetParallelReplicas(); + return new_context; } @@ -220,6 +228,35 @@ static ThrottlerPtr getThrottler(const ContextPtr & context) return throttler; } +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns) +{ + if (!context->canUseParallelReplicasCustomKeyForCluster(cluster)) + return {}; + + const auto & settings = context->getSettingsRef(); + auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context); + if (custom_key_ast == nullptr) + return {}; + + return [my_custom_key_ast = std::move(custom_key_ast), + column_description = columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, + custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, + query_context = context, + replica_count = cluster.getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr + { + return getCustomKeyFilterForParallelReplica( + replica_count, + replica_num - 1, + my_custom_key_ast, + {custom_key_type, custom_key_range_lower, custom_key_range_upper}, + column_description, + query_context); + }; +} + void executeQuery( QueryPlan & query_plan, @@ -412,14 +449,7 @@ void executeQueryWithParallelReplicas( const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas - if (settings.cluster_for_parallel_replicas.value.empty()) - { - throw Exception( - ErrorCodes::CLUSTER_DOESNT_EXIST, - "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " - "'cluster_for_parallel_replicas' setting"); - } - auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas); + auto not_optimized_cluster = context->getClusterForParallelReplicas(); auto new_context = Context::createCopy(context); @@ -542,6 +572,84 @@ void executeQueryWithParallelReplicas( executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); } +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context) +{ + /// Return directly (with correct header) if no shard to query. + if (query_info.getCluster()->getShardsInfo().empty()) + { + if (context->getSettingsRef().allow_experimental_analyzer) + return; + + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + return; + } + + ColumnsDescriptionByShardNum columns_object; + if (hasDynamicSubcolumns(columns)) + columns_object = getExtendedObjectsOfRemoteTables(*query_info.cluster, storage_id, columns, context); + + ClusterProxy::SelectStreamFactory select_stream_factory + = ClusterProxy::SelectStreamFactory(header, columns_object, snapshot, processed_stage); + + auto shard_filter_generator = getShardFilterGeneratorForCustomKey(*query_info.getCluster(), context, columns); + + ClusterProxy::executeQuery( + query_plan, + header, + processed_stage, + storage_id, + /*table_func_ptr=*/nullptr, + select_stream_factory, + getLogger("executeQueryWithParallelReplicasCustomKey"), + context, + query_info, + /*sharding_key_expr=*/nullptr, + /*sharding_key_column_name=*/{}, + /*distributed_settings=*/{}, + shard_filter_generator, + /*is_remote_function=*/false); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context) +{ + auto header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree, context, SelectQueryOptions(processed_stage).analyze()); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context) +{ + auto header = InterpreterSelectQuery(query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + query_info.query = ClusterProxy::rewriteSelectQuery( + context, query_info.query, storage_id.getDatabaseName(), storage_id.getTableName(), /*table_function_ptr=*/nullptr); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 6548edf8939..c22fcd24f03 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include namespace DB @@ -13,6 +13,11 @@ class Cluster; using ClusterPtr = std::shared_ptr; struct SelectQueryInfo; +class ColumnsDescription; +struct StorageSnapshot; + +using StorageSnapshotPtr = std::shared_ptr; + class Pipe; class QueryPlan; @@ -47,6 +52,9 @@ class SelectStreamFactory; ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns); + /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query /// (currently SELECT, DESCRIBE). @@ -91,6 +99,36 @@ void executeQueryWithParallelReplicas( const PlannerContextPtr & planner_context, ContextPtr context, std::shared_ptr storage_limits); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context); } } diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 53987694e46..4493a9f4dbd 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -16,12 +17,18 @@ #include #include #include +#include #include #include #include #include #include +namespace ProfileEvents +{ +extern const Event HashJoinPreallocatedElementsInHashTables; +} + namespace CurrentMetrics { extern const Metric ConcurrentHashJoinPoolThreads; @@ -29,6 +36,25 @@ extern const Metric ConcurrentHashJoinPoolThreadsActive; extern const Metric ConcurrentHashJoinPoolThreadsScheduled; } +namespace +{ + +void updateStatistics(const auto & hash_joins, const DB::StatsCollectingParams & params) +{ + if (!params.isCollectionAndUseEnabled()) + return; + + std::vector sizes(hash_joins.size()); + for (size_t i = 0; i < hash_joins.size(); ++i) + sizes[i] = hash_joins[i]->data->getTotalRowCount(); + const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though... + std::nth_element(sizes.begin(), median_size, sizes.end()); + if (auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull)) + DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params); +} + +} + namespace DB { @@ -46,7 +72,12 @@ static UInt32 toPowerOfTwo(UInt32 x) } ConcurrentHashJoin::ConcurrentHashJoin( - ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) + ContextPtr context_, + std::shared_ptr table_join_, + size_t slots_, + const Block & right_sample_block, + const StatsCollectingParams & stats_collecting_params_, + bool any_take_last_row_) : context(context_) , table_join(table_join_) , slots(toPowerOfTwo(std::min(static_cast(slots_), 256))) @@ -55,6 +86,7 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentMetrics::ConcurrentHashJoinPoolThreadsActive, CurrentMetrics::ConcurrentHashJoinPoolThreadsScheduled, slots)) + , stats_collecting_params(stats_collecting_params_) { hash_joins.resize(slots); @@ -74,9 +106,14 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentThread::attachToGroupIfDetached(thread_group); setThreadName("ConcurrentJoin"); + size_t reserve_size = 0; + if (auto hint = getSizeHint(stats_collecting_params, slots)) + reserve_size = hint->median_size; + ProfileEvents::increment(ProfileEvents::HashJoinPreallocatedElementsInHashTables, reserve_size); + auto inner_hash_join = std::make_shared(); inner_hash_join->data = std::make_unique( - table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", idx)); + table_join_, right_sample_block, any_take_last_row_, reserve_size, fmt::format("concurrent{}", idx)); /// Non zero `max_joined_block_rows` allows to process block partially and return not processed part. /// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature. inner_hash_join->data->setMaxJoinedBlockRows(0); @@ -97,6 +134,8 @@ ConcurrentHashJoin::~ConcurrentHashJoin() { try { + updateStatistics(hash_joins, stats_collecting_params); + for (size_t i = 0; i < slots; ++i) { // Hash tables destruction may be very time-consuming. @@ -300,4 +339,16 @@ Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, cons return result; } +UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression) +{ + IQueryTreeNode::HashState hash; + chassert(right_table_expression); + hash.update(right_table_expression->getTreeHash()); + chassert(table_join && table_join->oneDisjunct()); + const auto keys + = NameOrderedSet{table_join->getClauses().at(0).key_names_right.begin(), table_join->getClauses().at(0).key_names_right.end()}; + for (const auto & name : keys) + hash.update(name); + return hash.get64(); +} } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index efac648214e..a911edaccc3 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -38,6 +40,7 @@ public: std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, + const StatsCollectingParams & stats_collecting_params_, bool any_take_last_row_ = false); ~ConcurrentHashJoin() override; @@ -70,6 +73,8 @@ private: std::unique_ptr pool; std::vector> hash_joins; + StatsCollectingParams stats_collecting_params; + std::mutex totals_mutex; Block totals; @@ -77,4 +82,5 @@ private: Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block); }; +UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b946c2cb21e..f2626696492 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +191,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int CLUSTER_DOESNT_EXIST; + extern const int SET_NON_GRANTED_ROLE; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -1303,7 +1305,7 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) +void Context::setUser(const UUID & user_id_) { /// Prepare lists of user's profiles, constraints, settings, roles. /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, @@ -1312,8 +1314,8 @@ void Context::setUser(const UUID & user_id_, const std::optional(user_id_); - auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); - auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {}); + auto default_roles = user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = access_control.getEnabledRolesInfo(default_roles, {}); auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); const auto & database = user->default_database; @@ -1327,7 +1329,7 @@ void Context::setUser(const UUID & user_id_, const std::optional Context::getUserID() const return user_id; } -void Context::setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard &) +void Context::setCurrentRolesWithLock(const std::vector & new_current_roles, const std::lock_guard &) { - if (current_roles_.empty()) + if (new_current_roles.empty()) current_roles = nullptr; else - current_roles = std::make_shared>(current_roles_); + current_roles = std::make_shared>(new_current_roles); need_recalculate_access = true; } -void Context::setCurrentRoles(const std::vector & current_roles_) +void Context::setCurrentRolesImpl(const std::vector & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr & user) { - std::lock_guard lock(mutex); - setCurrentRolesWithLock(current_roles_, lock); + if (skip_if_not_granted) + { + auto filtered_role_ids = user->granted_roles.findGranted(new_current_roles); + std::lock_guard lock{mutex}; + setCurrentRolesWithLock(filtered_role_ids, lock); + return; + } + if (throw_if_not_granted) + { + for (const auto & role_id : new_current_roles) + { + if (!user->granted_roles.isGranted(role_id)) + { + auto role_name = getAccessControl().tryReadName(role_id); + throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", role_name.value_or(toString(role_id))); + } + } + } + std::lock_guard lock2{mutex}; + setCurrentRolesWithLock(new_current_roles, lock2); +} + +void Context::setCurrentRoles(const std::vector & new_current_roles, bool check_grants) +{ + setCurrentRolesImpl(new_current_roles, /* throw_if_not_granted= */ check_grants, /* skip_if_not_granted= */ !check_grants, getUser()); +} + +void Context::setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants) +{ + if (new_current_roles.all) + { + auto user = getUser(); + setCurrentRolesImpl(user->granted_roles.findGranted(new_current_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user); + } + else + { + setCurrentRoles(new_current_roles.getMatchingIDs(), check_grants); + } +} + +void Context::setCurrentRoles(const Strings & new_current_roles, bool check_grants) +{ + setCurrentRoles(getAccessControl().getIDs(new_current_roles), check_grants); } void Context::setCurrentRolesDefault() { auto user = getUser(); - setCurrentRoles(user->granted_roles.findGranted(user->default_roles)); + setCurrentRolesImpl(user->granted_roles.findGranted(user->default_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user); } std::vector Context::getCurrentRoles() const @@ -2238,6 +2281,12 @@ void Context::setSetting(std::string_view name, const Field & value) contextSanityClampSettingsWithLock(*this, settings, lock); } +void Context::setServerSetting(std::string_view name, const Field & value) +{ + std::lock_guard lock(mutex); + shared->server_settings.set(name, value); +} + void Context::applySettingChange(const SettingChange & change) { try @@ -4002,7 +4051,6 @@ std::shared_ptr Context::getQueryThreadLog() const std::shared_ptr Context::getQueryViewsLog() const { SharedLockGuard lock(shared->mutex); - if (!shared->system_logs) return {}; @@ -5469,10 +5517,37 @@ bool Context::canUseParallelReplicasOnFollower() const return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } -bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const +bool Context::canUseParallelReplicasCustomKey() const { - return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; + return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY; +} + +bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const +{ + return canUseParallelReplicasCustomKey() && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; +} + +bool Context::canUseOffsetParallelReplicas() const +{ + return offset_parallel_replicas_enabled && settings.max_parallel_replicas > 1 + && getParallelReplicasMode() != Context::ParallelReplicasMode::READ_TASKS; +} + +void Context::disableOffsetParallelReplicas() +{ + offset_parallel_replicas_enabled = false; +} + +ClusterPtr Context::getClusterForParallelReplicas() const +{ + /// check cluster for parallel replicas + if (settings.cluster_for_parallel_replicas.value.empty()) + throw Exception( + ErrorCodes::CLUSTER_DOESNT_EXIST, + "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " + "'cluster_for_parallel_replicas' setting"); + + return getCluster(settings.cluster_for_parallel_replicas); } void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d3f152b7a67..8c5492bcbc8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -61,6 +61,7 @@ class AccessFlags; struct AccessRightsElement; class AccessRightsElements; enum class RowPolicyFilterType : uint8_t; +struct RolesOrUsersSet; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalUserDefinedExecutableFunctionsLoader; @@ -150,6 +151,8 @@ class AsyncLoader; struct TemporaryTableHolder; using TemporaryTablesMapping = std::map>; +using ClusterPtr = std::shared_ptr; + class LoadTask; using LoadTaskPtr = std::shared_ptr; using LoadTaskPtrs = std::vector; @@ -457,6 +460,11 @@ protected: /// mutation tasks of one mutation executed against different parts of the same table. PreparedSetsCachePtr prepared_sets_cache; + /// this is a mode of parallel replicas where we set parallel_replicas_count and parallel_replicas_offset + /// and generate specific filters on the replicas (e.g. when using parallel replicas with sample key) + /// if we already use a different mode of parallel replicas we want to disable this mode + bool offset_parallel_replicas_enabled = true; + public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -600,13 +608,15 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); + void setUser(const UUID & user_id_); UserPtr getUser() const; std::optional getUserID() const; String getUserName() const; - void setCurrentRoles(const std::vector & current_roles_); + void setCurrentRoles(const Strings & new_current_roles, bool check_grants = true); + void setCurrentRoles(const std::vector & new_current_roles, bool check_grants = true); + void setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants = true); void setCurrentRolesDefault(); std::vector getCurrentRoles() const; std::vector getEnabledRoles() const; @@ -823,6 +833,7 @@ public: /// Set settings by name. void setSetting(std::string_view name, const String & value); void setSetting(std::string_view name, const Field & value); + void setServerSetting(std::string_view name, const Field & value); void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes); @@ -1308,7 +1319,13 @@ public: bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; - bool canUseParallelReplicasCustomKey(const Cluster & cluster) const; + bool canUseParallelReplicasCustomKey() const; + bool canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const; + bool canUseOffsetParallelReplicas() const; + + void disableOffsetParallelReplicas(); + + ClusterPtr getClusterForParallelReplicas() const; enum class ParallelReplicasMode : uint8_t { @@ -1333,7 +1350,7 @@ private: void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard & lock); - void setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard & lock); + void setCurrentRolesWithLock(const std::vector & new_current_roles, const std::lock_guard & lock); void setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock); @@ -1366,6 +1383,7 @@ private: void initGlobal(); void setUserID(const UUID & user_id_); + void setCurrentRolesImpl(const std::vector & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr & user); template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 841decf29c5..964baea1891 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include #include #include @@ -26,7 +28,9 @@ #include #include #include +#include +#include #include #include "config.h" @@ -190,6 +194,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec); + drop_table_concurrency = getContext()->getConfigRef().getInt64("database_catalog_drop_table_concurrency", drop_table_concurrency); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); @@ -1143,7 +1148,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr (*drop_task)->schedule(); } -void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) +void DatabaseCatalog::undropTable(StorageID table_id) { String latest_metadata_dropped_path; TableMarkedAsDropped dropped_table; @@ -1197,7 +1202,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) /// It's unsafe to create another instance while the old one exists /// We cannot wait on shared_ptr's refcount, so it's busy wait - while (!dropped_table.table.unique()) + while (!isSharedPtrUnique(dropped_table.table)) std::this_thread::sleep_for(std::chrono::milliseconds(100)); dropped_table.table.reset(); @@ -1218,91 +1223,166 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) LOG_INFO(log, "Table {} was successfully undropped.", dropped_table.table_id.getNameForLogs()); } +std::tuple DatabaseCatalog::getDroppedTablesCountAndInuseCount() +{ + std::lock_guard lock(tables_marked_dropped_mutex); + + size_t in_use_count = 0; + for (const auto & item : tables_marked_dropped) + { + bool in_use = item.table && !isSharedPtrUnique(item.table); + in_use_count += in_use; + } + return {tables_marked_dropped.size(), in_use_count}; +} + +time_t DatabaseCatalog::getMinDropTime() +{ + time_t min_drop_time = std::numeric_limits::max(); + for (const auto & item : tables_marked_dropped) + { + min_drop_time = std::min(min_drop_time, item.drop_time); + } + return min_drop_time; +} + +std::vector DatabaseCatalog::getTablesToDrop() +{ + time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + decltype(getTablesToDrop()) result; + + std::lock_guard lock(tables_marked_dropped_mutex); + + for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it) + { + bool in_use = it->table && !isSharedPtrUnique(it->table); + bool old_enough = it->drop_time <= current_time; + if (!in_use && old_enough) + result.emplace_back(it); + } + + return result; +} + +void DatabaseCatalog::rescheduleDropTableTask() +{ + std::lock_guard lock(tables_marked_dropped_mutex); + + if (tables_marked_dropped.empty()) + return; + + if (first_async_drop_in_queue != tables_marked_dropped.begin()) + { + (*drop_task)->scheduleAfter(0); + return; + } + + time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + auto min_drop_time = getMinDropTime(); + time_t schedule_after_ms = min_drop_time > current_time ? (min_drop_time - current_time) * 1000 : 0; + + LOG_TRACE( + log, + "Have {} tables in queue to drop. Schedule background task in {} seconds", + tables_marked_dropped.size(), schedule_after_ms / 1000); + (*drop_task)->scheduleAfter(schedule_after_ms); +} + +void DatabaseCatalog::dropTablesParallel(std::vector tables_to_drop) +{ + if (tables_to_drop.empty()) + return; + + ThreadPool pool( + CurrentMetrics::DatabaseCatalogThreads, + CurrentMetrics::DatabaseCatalogThreadsActive, + CurrentMetrics::DatabaseCatalogThreadsScheduled, + /* max_threads */drop_table_concurrency, + /* max_free_threads */0, + /* queue_size */tables_to_drop.size()); + + for (const auto & item : tables_to_drop) + { + auto job = [&, table_iterator = item] () + { + try + { + dropTableFinally(*table_iterator); + + TableMarkedAsDropped table_to_delete_without_lock; + { + std::lock_guard lock(tables_marked_dropped_mutex); + + if (first_async_drop_in_queue == table_iterator) + ++first_async_drop_in_queue; + + [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table_iterator->table_id.uuid); + chassert(removed); + + table_to_delete_without_lock = std::move(*table_iterator); + tables_marked_dropped.erase(table_iterator); + + wait_table_finally_dropped.notify_all(); + } + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop table " + table_iterator->table_id.getNameForLogs() + + ". Will retry later."); + { + std::lock_guard lock(tables_marked_dropped_mutex); + + if (first_async_drop_in_queue == table_iterator) + ++first_async_drop_in_queue; + + tables_marked_dropped.splice(tables_marked_dropped.end(), tables_marked_dropped, table_iterator); + table_iterator->drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; + + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; + } + } + }; + + try + { + pool.scheduleOrThrowOnError(std::move(job)); + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop tables. Will retry later."); + break; + } + } + + try + { + pool.wait(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop tables. Will retry later."); + } +} + void DatabaseCatalog::dropTableDataTask() { /// Background task that removes data of tables which were marked as dropped by Atomic databases. /// Table can be removed when it's not used by queries and drop_delay_sec elapsed since it was marked as dropped. - bool need_reschedule = true; - /// Default reschedule time for the case when we are waiting for reference count to become 1. - size_t schedule_after_ms = reschedule_time_ms; - TableMarkedAsDropped table; - try + auto [drop_tables_count, drop_tables_in_use_count] = getDroppedTablesCountAndInuseCount(); + + auto tables_to_drop = getTablesToDrop(); + + if (!tables_to_drop.empty()) { - std::lock_guard lock(tables_marked_dropped_mutex); - if (tables_marked_dropped.empty()) - return; - time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - time_t min_drop_time = std::numeric_limits::max(); - size_t tables_in_use_count = 0; - auto it = std::find_if(tables_marked_dropped.begin(), tables_marked_dropped.end(), [&](const auto & elem) - { - bool not_in_use = !elem.table || elem.table.unique(); - bool old_enough = elem.drop_time <= current_time; - min_drop_time = std::min(min_drop_time, elem.drop_time); - tables_in_use_count += !not_in_use; - return not_in_use && old_enough; - }); - if (it != tables_marked_dropped.end()) - { - table = std::move(*it); - LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", - tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); - if (first_async_drop_in_queue == it) - ++first_async_drop_in_queue; - tables_marked_dropped.erase(it); - /// Schedule the task as soon as possible, while there are suitable tables to drop. - schedule_after_ms = 0; - } - else if (current_time < min_drop_time) - { - /// We are waiting for drop_delay_sec to exceed, no sense to wakeup until min_drop_time. - /// If new table is added to the queue with ignore_delay flag, schedule() is called to wakeup the task earlier. - schedule_after_ms = (min_drop_time - current_time) * 1000; - LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue ({} of them are in use). " - "Will check again after {} seconds", tables_marked_dropped.size(), tables_in_use_count, min_drop_time - current_time); - } - need_reschedule = !tables_marked_dropped.empty(); - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {} tables", + drop_tables_count, drop_tables_in_use_count, tables_to_drop.size()); + + dropTablesParallel(tables_to_drop); } - if (table.table_id) - { - try - { - dropTableFinally(table); - std::lock_guard lock(tables_marked_dropped_mutex); - [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid); - assert(removed); - } - catch (...) - { - tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() + - ". Will retry later."); - { - table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; - std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.emplace_back(std::move(table)); - if (first_async_drop_in_queue == tables_marked_dropped.end()) - --first_async_drop_in_queue; - /// If list of dropped tables was empty, schedule a task to retry deletion. - if (tables_marked_dropped.size() == 1) - { - need_reschedule = true; - schedule_after_ms = drop_error_cooldown_sec * 1000; - } - } - } - - wait_table_finally_dropped.notify_all(); - } - - /// Do not schedule a task if there is no tables to drop - if (need_reschedule) - (*drop_task)->scheduleAfter(schedule_after_ms); + rescheduleDropTableTask(); } void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 17d34e96245..23e38a6445e 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -225,7 +225,7 @@ public: String getPathForDroppedMetadata(const StorageID & table_id) const; String getPathForMetadata(const StorageID & table_id) const; void enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay = false); - void dequeueDroppedTableCleanup(StorageID table_id); + void undropTable(StorageID table_id); void waitTableFinallyDropped(const UUID & uuid); @@ -296,6 +296,12 @@ private: void dropTableDataTask(); void dropTableFinally(const TableMarkedAsDropped & table); + time_t getMinDropTime() TSA_REQUIRES(tables_marked_dropped_mutex); + std::tuple getDroppedTablesCountAndInuseCount(); + std::vector getTablesToDrop(); + void dropTablesParallel(std::vector tables); + void rescheduleDropTableTask(); + void cleanupStoreDirectoryTask(); bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir); @@ -363,6 +369,9 @@ private: static constexpr time_t default_drop_error_cooldown_sec = 5; time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec; + static constexpr size_t default_drop_table_concurrency = 10; + size_t drop_table_concurrency = default_drop_table_concurrency; + std::unique_ptr reload_disks_task; std::mutex reload_disks_mutex; std::set disks_to_reload; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 8e9ff9ed46c..16d0eb71278 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -986,7 +986,8 @@ static std::shared_ptr tryCreateJoin( const auto & settings = context->getSettings(); if (analyzed_join->allowParallelHashJoin()) - return std::make_shared(context, analyzed_join, settings.max_threads, right_sample_block); + return std::make_shared( + context, analyzed_join, settings.max_threads, right_sample_block, StatsCollectingParams{}); return std::make_shared(analyzed_join, right_sample_block); } diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h index 49b5e68d821..6356a174a01 100644 --- a/src/Interpreters/ExternalLoader.h +++ b/src/Interpreters/ExternalLoader.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/HashTablesStatistics.cpp b/src/Interpreters/HashTablesStatistics.cpp new file mode 100644 index 00000000000..d66f1bbd1d3 --- /dev/null +++ b/src/Interpreters/HashTablesStatistics.cpp @@ -0,0 +1,117 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +std::optional HashTablesStatistics::getSizeHint(const Params & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + if (const auto hint = cache->get(params.key)) + { + LOG_TRACE( + getLogger("HashTablesStatistics"), + "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", + params.key, + hint->sum_of_sizes, + hint->median_size); + return *hint; + } + return std::nullopt; +} + +/// Collection and use of the statistics should be enabled. +void HashTablesStatistics::update(size_t sum_of_sizes, size_t median_size, const Params & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + const auto hint = cache->get(params.key); + // We'll maintain the maximum among all the observed values until another prediction is much lower (that should indicate some change) + if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 + || hint->median_size < median_size) + { + LOG_TRACE( + getLogger("HashTablesStatistics"), + "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", + params.key, + sum_of_sizes, + median_size); + cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); + } +} + +std::optional HashTablesStatistics::getCacheStats() const +{ + std::lock_guard lock(mutex); + if (hash_table_stats) + { + size_t hits = 0, misses = 0; + hash_table_stats->getStats(hits, misses); + return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; + } + return std::nullopt; +} + +HashTablesStatistics::CachePtr HashTablesStatistics::getHashTableStatsCache(const Params & params, const std::lock_guard &) +{ + if (!hash_table_stats) + hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats * sizeof(Entry)); + return hash_table_stats; +} + +HashTablesStatistics & getHashTablesStatistics() +{ + static HashTablesStatistics hash_tables_stats; + return hash_tables_stats; +} + +std::optional getHashTablesCacheStatistics() +{ + return getHashTablesStatistics().getCacheStats(); +} + +std::optional getSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt) +{ + if (stats_collecting_params.isCollectionAndUseEnabled()) + { + if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) + { + const auto lower_limit = hint->sum_of_sizes / tables_cnt; + const auto upper_limit = stats_collecting_params.max_size_to_preallocate / tables_cnt; + if (hint->median_size > upper_limit) + { + /// Since we cannot afford to preallocate as much as needed, we would likely have to do at least one resize anyway. + /// Though it still sounds better than N resizes, but in actuality we saw that one big resize (remember, HT-s grow exponentially) + /// plus worse cache locality since we're dealing with big HT-s from the beginning yields worse performance. + /// So let's just do nothing. + LOG_TRACE( + getLogger("HashTablesStatistics"), + "No space were preallocated in hash tables because 'max_size_to_preallocate' has too small value: {}, " + "should be at least {}", + stats_collecting_params.max_size_to_preallocate, + hint->median_size * tables_cnt); + } + /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 + else if ((tables_cnt > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) + { + return HashTablesStatistics::Entry{hint->sum_of_sizes, std::max(lower_limit, hint->median_size)}; + } + } + } + return std::nullopt; +} +} diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h new file mode 100644 index 00000000000..7b4c4fcbfeb --- /dev/null +++ b/src/Interpreters/HashTablesStatistics.h @@ -0,0 +1,69 @@ +#pragma once + +#include + +namespace DB +{ + +struct HashTablesCacheStatistics +{ + size_t entries = 0; + size_t hits = 0; + size_t misses = 0; +}; + +struct StatsCollectingParams +{ + StatsCollectingParams() = default; + + StatsCollectingParams(UInt64 key_, bool enable_, size_t max_entries_for_hash_table_stats_, size_t max_size_to_preallocate_) + : key(enable_ ? key_ : 0) + , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) + , max_size_to_preallocate(max_size_to_preallocate_) + { + } + + bool isCollectionAndUseEnabled() const { return key != 0; } + void disable() { key = 0; } + + UInt64 key = 0; + const size_t max_entries_for_hash_table_stats = 0; + const size_t max_size_to_preallocate = 0; +}; + +/** Collects observed HashTable-s sizes to avoid redundant intermediate resizes. + */ +class HashTablesStatistics +{ +public: + struct Entry + { + size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning + size_t median_size; // roughly the size we're going to preallocate on each thread + }; + + using Cache = DB::CacheBase; + using CachePtr = std::shared_ptr; + using Params = StatsCollectingParams; + + /// Collection and use of the statistics should be enabled. + std::optional getSizeHint(const Params & params); + + /// Collection and use of the statistics should be enabled. + void update(size_t sum_of_sizes, size_t median_size, const Params & params); + + std::optional getCacheStats() const; + +private: + CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &); + + mutable std::mutex mutex; + CachePtr hash_table_stats; +}; + +HashTablesStatistics & getHashTablesStatistics(); + +std::optional getHashTablesCacheStatistics(); + +std::optional getSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt); +} diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index 79a17fd1844..0e83e2039f6 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterAlterNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().updateFromSQL(query); diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 81bb6290acb..4a84a7bf570 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -23,7 +22,6 @@ #include #include -#include #include #include #include @@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfoCloneable +class TableCheckTask : public ChunkInfo { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -112,12 +110,6 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } - TableCheckTask(const TableCheckTask & other) - : table(other.table) - , check_data_tasks(other.check_data_tasks) - , is_finished(other.is_finished.load()) - {} - std::optional checkNext() const { if (isFinished()) @@ -129,8 +121,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr tmp = check_data_tasks; - auto result = table->checkDataNext(tmp); + IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; + auto result = table->checkDataNext(check_data_tasks_); is_finished = !result.has_value(); return result; } @@ -188,7 +180,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.getChunkInfos().add(std::move(current_check_task)); + result.setChunkInfo(std::move(current_check_task)); return result; } @@ -288,7 +280,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = chunk.getChunkInfos().get(); + auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c71441daa8c..b4920b1729f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterCreateNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().createFromSQL(query); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ee191c02ff8..0ee2bb6c0e9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery( - insert, - getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false).execute(); + return InterpreterInsertQuery(insert, getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index 2edaef1b2f2..6233d21b439 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterDropNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().removeFromSQL(query); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 26b7e074fdf..7c7b4b3f95a 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert( - ast.getExplainedQuery(), - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2cbfc55d008..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +38,6 @@ #include #include #include -#include "base/defines.h" namespace ProfileEvents @@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) -{ - chassert(presink_streams > 0); - chassert(sink_streams > 0); - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - - std::vector sink_chains; - std::vector presink_chains; - - for (size_t i = 0; i < sink_streams; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - - for (size_t i = 0; i < presink_streams; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } - - return {std::move(presink_chains), std::move(sink_chains)}; -} - - -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - ContextPtr select_context = getContext(); - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = select_context->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto context_for_trivial_select = Context::createCopy(context); - context_for_trivial_select->setSettings(new_settings); - context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - select_context = context_for_trivial_select; - } - - QueryPipelineBuilder pipeline; - - { - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - { - query_sample_block.setColumn( - col_idx, - ColumnWithTypeAndName( - makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), - makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), - query_columns[col_idx].name)); - } - } - } - } - - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - size_t num_select_threads = pipeline.getNumThreads(); - - pipeline.resize(1); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - - if (!settings.parallel_view_processing) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - if (table->isView() || !views.empty()) - sink_streams_size = 1; - } - - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - pipeline.resize(presink_chains.size()); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - for (auto & chain : presink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(presink_chains)); - - pipeline.resize(sink_streams_size); - - for (auto & chain : sink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) - { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); - } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); -} - - -QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - Chain chain; - - { - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - /* presink_streams */1, /* sink_streams */1, - table, metadata_snapshot, query_sample_block); - - chain = std::move(presink_chains.front()); - chain.appendChain(std::move(sink_chains.front())); - } - - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - - chain.addSource(std::make_shared(chain.getInputHeader())); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - QueryPipeline pipeline = QueryPipeline(std::move(chain)); - - pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); - pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - pipeline.complete(std::move(pipe)); - } - - return pipeline; -} - - BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); + QueryPipelineBuilder pipeline; + std::optional distributed_pipeline; + QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (!allow_materialized) + if (query.select && settings.parallel_distributed_insert_select) + // Distributed INSERT SELECT + distributed_pipeline = table->distributedWrite(query, getContext()); + + std::vector presink_chains; + std::vector sink_chains; + if (!distributed_pipeline) { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + /// * If it's not an INSERT SELECT, forget all that and use one stream. + size_t pre_streams_size = 1; + size_t sink_streams_size = 1; + + if (query.select) + { + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = getContext()->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto new_context = Context::createCopy(context); + new_context->setSettings(new_settings); + new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + else + { + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + if (settings.max_insert_threads > 1) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); + pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads + : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); + pre_streams_size = 1; + } + + if (table->supportsParallelInsert()) + sink_streams_size = pre_streams_size; + } + + pipeline.resize(pre_streams_size); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); + } + } + } + } + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + for (size_t i = 0; i < sink_streams_size; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); + } + for (size_t i = 0; i < pre_streams_size; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } } BlockIO res; - if (query.select) + /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? + if (distributed_pipeline) { - if (settings.parallel_distributed_insert_select) + res.pipeline = std::move(*distributed_pipeline); + } + else if (query.select) + { + const auto & header = presink_chains.at(0).getInputHeader(); + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - auto distributed = table->distributedWrite(query, getContext()); - if (distributed) - { - res.pipeline = std::move(*distributed); - } - else - { - res.pipeline = buildInsertSelectPipeline(query, table); - } - } - else + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - res.pipeline = buildInsertSelectPipeline(query, table); + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); } + + size_t num_select_threads = pipeline.getNumThreads(); + + for (auto & chain : presink_chains) + resources = chain.detachResources(); + for (auto & chain : sink_chains) + resources = chain.detachResources(); + + pipeline.addChains(std::move(presink_chains)); + pipeline.resize(sink_chains.size()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + if (!allow_materialized) + { + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + } + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - res.pipeline = buildInsertPipeline(query, table); + auto & chain = presink_chains.at(0); + chain.appendChain(std::move(sink_chains.at(0))); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + res.pipeline = QueryPipeline(std::move(presink_chains[0])); + res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + res.pipeline.complete(std::move(pipe)); + } } - res.pipeline.addStorageHolder(table); + res.pipeline.addResources(std::move(resources)); - if (const auto * mv = dynamic_cast(table.get())) - res.pipeline.addStorageHolder(mv->getTargetTable()); + res.pipeline.addStorageHolder(table); + if (inner_table) + res.pipeline.addStorageHolder(inner_table); return res; } @@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } - void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } - void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, - /* no_destination */false, - /* async_insert */false); + return std::make_unique(args.query, args.context, args.allow_materialized); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } - - } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 894c7c42144..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_, - bool no_squash_, - bool no_destination, - bool async_insert_); + bool allow_materialized_ = false, + bool no_squash_ = false, + bool no_destination_ = false, + bool async_insert_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,17 +73,12 @@ private: ASTPtr query_ptr; const bool allow_materialized; - bool no_squash = false; - bool no_destination = false; + const bool no_squash; + const bool no_destination; const bool async_insert; std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - - QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); - QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); - Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 90c484636ea..f13710e9ad5 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -85,16 +85,17 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include #include -#include -#include namespace ProfileEvents @@ -565,7 +566,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context); ASTPtr parallel_replicas_custom_filter_ast = nullptr; - if (storage && context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty()) + if (storage && context->canUseParallelReplicasCustomKey() && !joined_tables.tablesWithColumns().empty()) { if (settings.parallel_replicas_count > 1) { @@ -586,16 +587,28 @@ InterpreterSelectQuery::InterpreterSelectQuery( else if (settings.parallel_replica_offset > 0) { throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Parallel replicas processing with custom_key has been requested " - "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " - "or it's invalid (settings `parallel_replicas_custom_key`)"); + ErrorCodes::BAD_ARGUMENTS, + "Parallel replicas processing with custom_key has been requested " + "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " + "or it's invalid (settings `parallel_replicas_custom_key`)"); } } + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas else if (auto * distributed = dynamic_cast(storage.get()); - distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { context->setSetting("distributed_group_by_no_merge", 2); + context->setSetting("prefer_localhost_replica", Field(0)); + } + else if ( + storage->isMergeTree() && (storage->supportsReplication() || settings.parallel_replicas_for_non_replicated_merge_tree) + && context->getClientInfo().distributed_depth == 0 + && context->canUseParallelReplicasCustomKeyForCluster(*context->getClusterForParallelReplicas())) + { + context->setSetting("prefer_localhost_replica", Field(0)); } } @@ -2603,10 +2616,10 @@ static Aggregator::Params getAggregatorParams( size_t group_by_two_level_threshold, size_t group_by_two_level_threshold_bytes) { - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( - query_ptr, + const auto stats_collecting_params = StatsCollectingParams( + calculateCacheKey(query_ptr), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + context.getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); return Aggregator::Params diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index 920df3d6aed..8f935e951ef 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -64,7 +64,7 @@ BlockIO InterpreterUndropQuery::executeToTable(ASTUndropQuery & query) database->checkMetadataFilenameAvailability(table_id.table_name); - DatabaseCatalog::instance().dequeueDroppedTableCleanup(table_id); + DatabaseCatalog::instance().undropTable(table_id); return {}; } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 20451fb20ad..48c9988b6fc 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -73,66 +73,55 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v return false; } -void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) +void OptimizeIfWithConstantConditionVisitorData::visit(ASTFunction & function_node, ASTPtr & ast) { - if (!current_ast) - return; - checkStackSize(); - for (ASTPtr & child : current_ast->children) + if (function_node.name != "if") + return; + + if (!function_node.arguments) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); + + if (function_node.arguments->children.size() != 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_node.arguments->children.size()); + + const auto * args = function_node.arguments->as(); + + ASTPtr condition_expr = args->children[0]; + ASTPtr then_expr = args->children[1]; + ASTPtr else_expr = args->children[2]; + + bool condition; + if (tryExtractConstValueFromCondition(condition_expr, condition)) { - auto * function_node = child->as(); - if (!function_node || function_node->name != "if") + ASTPtr replace_ast = condition ? then_expr : else_expr; + ASTPtr child_copy = ast; + String replace_alias = replace_ast->tryGetAlias(); + String if_alias = ast->tryGetAlias(); + + if (replace_alias.empty()) { - visit(child); - continue; + replace_ast->setAlias(if_alias); + ast = replace_ast; + } + else + { + /// Only copy of one node is required here. + /// But IAST has only method for deep copy of subtree. + /// This can be a reason of performance degradation in case of deep queries. + ASTPtr replace_ast_deep_copy = replace_ast->clone(); + replace_ast_deep_copy->setAlias(if_alias); + ast = replace_ast_deep_copy; } - if (!function_node->arguments) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); - - if (function_node->arguments->children.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Wrong number of arguments for function 'if' ({} instead of 3)", - function_node->arguments->children.size()); - - visit(function_node->arguments); - const auto * args = function_node->arguments->as(); - - ASTPtr condition_expr = args->children[0]; - ASTPtr then_expr = args->children[1]; - ASTPtr else_expr = args->children[2]; - - bool condition; - if (tryExtractConstValueFromCondition(condition_expr, condition)) + if (!if_alias.empty()) { - ASTPtr replace_ast = condition ? then_expr : else_expr; - ASTPtr child_copy = child; - String replace_alias = replace_ast->tryGetAlias(); - String if_alias = child->tryGetAlias(); - - if (replace_alias.empty()) - { - replace_ast->setAlias(if_alias); - child = replace_ast; - } - else - { - /// Only copy of one node is required here. - /// But IAST has only method for deep copy of subtree. - /// This can be a reason of performance degradation in case of deep queries. - ASTPtr replace_ast_deep_copy = replace_ast->clone(); - replace_ast_deep_copy->setAlias(if_alias); - child = replace_ast_deep_copy; - } - - if (!if_alias.empty()) - { - auto alias_it = aliases.find(if_alias); - if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) - alias_it->second = child; - } + auto alias_it = aliases.find(if_alias); + if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) + alias_it->second = ast; } } } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h index ad98f92bafd..3b46f90f07c 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -1,23 +1,24 @@ #pragma once #include +#include namespace DB { - -/// It removes Function_if node from AST if condition is constant. -/// TODO: rewrite with InDepthNodeVisitor -class OptimizeIfWithConstantConditionVisitor +struct OptimizeIfWithConstantConditionVisitorData { -public: - explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + using TypeToVisit = ASTFunction; + + explicit OptimizeIfWithConstantConditionVisitorData(Aliases & aliases_) : aliases(aliases_) {} - void visit(ASTPtr & ast); - + void visit(ASTFunction & function_node, ASTPtr & ast); private: Aliases & aliases; }; +/// It removes Function_if node from AST if condition is constant. +using OptimizeIfWithConstantConditionVisitor = InDepthNodeVisitor, false>; + } diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 8319d373f39..abece2604f2 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -25,11 +25,11 @@ struct ProcessorProfileLogElement String processor_name; /// Milliseconds spend in IProcessor::work() - UInt32 elapsed_us{}; + UInt64 elapsed_us{}; /// IProcessor::NeedData - UInt32 input_wait_elapsed_us{}; + UInt64 input_wait_elapsed_us{}; /// IProcessor::PortFull - UInt32 output_wait_elapsed_us{}; + UInt64 output_wait_elapsed_us{}; size_t input_rows{}; size_t input_bytes{}; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index bb8c415602f..fb80b12ee60 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -130,7 +131,7 @@ public: LOG_TRACE(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); - if (!session.unique()) + if (!isSharedPtrUnique(session)) throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id); return {session, false}; } @@ -156,7 +157,7 @@ public: return; } - if (!it->second.unique()) + if (!isSharedPtrUnique(it->second)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot close session {} with refcount {}", session_id, it->second.use_count()); sessions.erase(it); diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 25434d1103e..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -12,33 +11,24 @@ namespace ErrorCodes } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(header_) { } Chunk Squashing::flush() { - if (!accumulated) - return {}; - - auto result = convertToChunk(accumulated.extract()); - chassert(result); - return result; + return convertToChunk(std::move(chunks_to_merge_vec)); } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk) + if (!input_chunk.hasChunkInfo()) return Chunk(); - auto squash_info = input_chunk.getChunkInfos().extract(); - - if (!squash_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk)) + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) { /// If no accumulated data, return just read block. - if (!accumulated) + if (chunks_to_merge_vec.empty()) { - accumulated.add(std::move(input_chunk)); - return convertToChunk(accumulated.extract()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize()) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - accumulated.add(std::move(input_chunk)); + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize()) - return convertToChunk(accumulated.extract()); - + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } return {}; } @@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have columns even if they are empty - auto aggr_chunk = Chunk(header.getColumns(), 0); - aggr_chunk.getChunkInfos().add(std::move(info)); - chassert(aggr_chunk); - return aggr_chunk; + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); } -Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) +Chunk Squashing::squash(std::vector & input_chunks) { + Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} - Chunk result; - result.setColumns(std::move(mutable_columns), rows); - result.setChunkInfos(infos); - result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); - chassert(result); - return result; + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -bool Squashing::isEnoughSize() const -{ - return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); -}; - -bool Squashing::isEnoughSize(const Chunk & chunk) const -{ - return isEnoughSize(chunk.getNumRows(), chunk.bytes()); -} - -void Squashing::CurrentSize::add(Chunk && chunk) -{ - rows += chunk.getNumRows(); - bytes += chunk.bytes(); - chunks.push_back(std::move(chunk)); -} - -std::vector Squashing::CurrentSize::extract() -{ - auto result = std::move(chunks); - *this = {}; - return result; -} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 64a9768a71f..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,18 +8,9 @@ namespace DB { -class ChunksToSquash : public ChunkInfoCloneable +struct ChunksToSquash : public ChunkInfo { -public: - ChunksToSquash() = default; - ChunksToSquash(const ChunksToSquash & other) - { - chunks.reserve(other.chunks.size()); - for (const auto & chunk: other.chunks) - chunks.push_back(chunk.clone()); - } - - std::vector chunks = {}; + mutable std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -45,35 +36,32 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - void setHeader(Block header_) { header = std::move(header_); } - const Block & getHeader() const { return header; } - -private: - class CurrentSize + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize { - std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; - - public: - explicit operator bool () const { return !chunks.empty(); } - size_t getRows() const { return rows; } - size_t getBytes() const { return bytes; } - void add(Chunk && chunk); - std::vector extract(); }; - const size_t min_block_size_rows; - const size_t min_block_size_bytes; - Block header; + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; - CurrentSize accumulated; + CurrentSize accumulated_size; - static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - bool isEnoughSize() const; + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; - bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index f386e157b14..7d84efba1b5 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -356,10 +357,15 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf if (blob_storage_log) logs.emplace_back(blob_storage_log.get()); + bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { for (auto & log : logs) + { log->startup(); + if (should_prepare) + log->prepareTable(); + } } catch (...) { @@ -538,13 +544,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter( - query_ptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 94cb8c3e2fd..0ac468b15ec 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -135,6 +135,12 @@ public: void stopFlushThread() override; + /** Creates new table if it does not exist. + * Renames old table if its structure is not suitable. + * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. + */ + void prepareTable() override; + protected: LoggerPtr log; @@ -145,12 +151,6 @@ protected: StoragePtr getStorage() const; - /** Creates new table if it does not exist. - * Renames old table if its structure is not suitable. - * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. - */ - void prepareTable() override; - /// Some tables can override settings for internal queries virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 9ca521a4ab3..6ec6a64b13d 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -233,7 +233,8 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_) { /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; - thread_group->linkThread(thread_id); + if (!internal_thread) + thread_group->linkThread(thread_id); performance_counters.setParent(&thread_group->performance_counters); memory_tracker.setParent(&thread_group->memory_tracker); @@ -269,7 +270,8 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); - thread_group->unlinkThread(); + if (!internal_thread) + thread_group->unlinkThread(); thread_group.reset(); diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b88d75cd5a2..b872eb94fde 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -577,7 +577,8 @@ void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_ optimizeMultiIfToIf(query); /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. - OptimizeIfWithConstantConditionVisitor(aliases).visit(query); + OptimizeIfWithConstantConditionVisitorData visitor_data(aliases); + OptimizeIfWithConstantConditionVisitor(visitor_data).visit(query); if (if_chain_to_multiif) OptimizeIfChainsVisitor().visit(query); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6ce6f5e454e..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolumns in unknown required columns. + /// Check for dynamic subcolums in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index e372f036073..9c3f85128cf 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -237,6 +237,7 @@ private: Int64 timeout_seconds = 120; bool is_replicated_database = false; bool throw_on_timeout = true; + bool throw_on_timeout_only_active = false; bool only_running_hosts = false; bool timeout_exceeded = false; @@ -316,8 +317,8 @@ DDLQueryStatusSource::DDLQueryStatusSource( , log(getLogger("DDLQueryStatusSource")) { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; - throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; + throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE; + throw_on_timeout_only_active = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { @@ -451,7 +452,7 @@ Chunk DDLQueryStatusSource::generate() "({} of them are currently executing the task, {} are inactive). " "They are going to execute the query in background. Was waiting for {} seconds{}"; - if (throw_on_timeout) + if (throw_on_timeout || (throw_on_timeout_only_active && !stop_waiting_offline_hosts)) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9f33cbf1c27..d9d3ba58160 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -475,10 +475,9 @@ void logQueryFinish( processor_elem.processor_name = processor->getName(); - /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); + processor_elem.elapsed_us = static_cast(processor->getElapsedNs() / 1000U); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedNs() / 1000U); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedNs() / 1000U); auto stats = processor->getProcessorDataStats(); processor_elem.input_rows = stats.input_rows; diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index 44167fe7242..dd20164925c 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -15,6 +15,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -38,6 +42,13 @@ static bool isAccessControlQuery(const ASTPtr & query) || query->as(); } +static bool isNamedCollectionQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as(); +} + ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) { auto * query_on_cluster = dynamic_cast(query.get()); @@ -50,7 +61,10 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c && context->getUserDefinedSQLObjectsStorage().isReplicated()) || (isAccessControlQuery(query) && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries - && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)) + || (isNamedCollectionQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_named_collections_queries + && NamedCollectionFactory::instance().usesReplicatedStorage())) { LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); return query_on_cluster->getRewrittenASTWithoutOnCluster(params); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 602ef8c232b..f39229d7566 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -408,25 +408,26 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { const char * operators[] = { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "ilike", " ILIKE ", - "notLike", " NOT LIKE ", - "notILike", " NOT ILIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "isNotDistinctFrom", " <=> ", + "like", " LIKE ", + "ilike", " ILIKE ", + "notLike", " NOT LIKE ", + "notILike", " NOT ILIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", nullptr }; diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index e782bad797e..d22a4eca0fc 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -243,7 +243,7 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { formatImplBeforeTable(settings, state, frame); - settings.ostr << " ... "; + settings.ostr << " ..."; formatImplAfterTable(settings, state, frame); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 5997452bcf3..d4fc9a4bc4d 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node, if (hex) { - hexStringDecode(str_begin, str_end, res_pos); + hexStringDecode(str_begin, str_end, res_pos, word_size); } else { - binStringDecode(str_begin, str_end, res_pos); + binStringDecode(str_begin, str_end, res_pos, word_size); } return makeStringLiteral(pos, node, String(reinterpret_cast(res.data()), (res_pos - res_begin - 1))); diff --git a/src/Parsers/isUnquotedIdentifier.cpp b/src/Parsers/isUnquotedIdentifier.cpp new file mode 100644 index 00000000000..6f2442635ec --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.cpp @@ -0,0 +1,20 @@ +#include + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name) +{ + Lexer lexer(name.data(), name.data() + name.size()); + + auto maybe_ident = lexer.nextToken(); + + if (maybe_ident.type != TokenType::BareWord) + return false; + + return lexer.nextToken().isEnd(); +} + +} diff --git a/src/Parsers/isUnquotedIdentifier.h b/src/Parsers/isUnquotedIdentifier.h new file mode 100644 index 00000000000..839e5860ad3 --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name); + +} diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2d42ed73223..49c4e264f7d 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -370,10 +371,10 @@ Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context const auto & query_context = planner_context->getQueryContext(); const Settings & settings = query_context->getSettingsRef(); - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( - select_query_info.query, + const auto stats_collecting_params = StatsCollectingParams( + calculateCacheKey(select_query_info.query), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + query_context->getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); auto aggregate_descriptions = aggregation_analysis_result.aggregate_descriptions; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d26092d57cb..d18342880f0 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -647,7 +647,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); - table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; + table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas + = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; size_t max_threads_execute_query = settings.max_threads; @@ -834,7 +835,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (row_policy_filter_info.actions) table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); - if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) + if (query_context->canUseParallelReplicasCustomKey()) { if (settings.parallel_replicas_count > 1) { @@ -843,9 +844,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else if (auto * distributed = typeid_cast(storage.get()); - distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); } } @@ -857,6 +863,15 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres from_stage = storage->getQueryProcessingStage( query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + /// It is just a safety check needed until we have a proper sending plan to replicas. + /// If we have a non-trivial storage like View it might create its own Planner inside read(), run findTableForParallelReplicas() + /// and find some other table that might be used for reading with parallel replicas. It will lead to errors. + const bool other_table_already_chosen_for_reading_with_parallel_replicas + = planner_context->getGlobalPlannerContext()->parallel_replicas_table + && !table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas; + if (other_table_already_chosen_for_reading_with_parallel_replicas) + planner_context->getMutableQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + storage->read( query_plan, columns_names, @@ -879,7 +894,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres }; /// query_plan can be empty if there is nothing to read - if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator()) + if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings)) { // (1) find read step QueryPlan::Node * node = query_plan.getRootNode(); @@ -906,54 +921,78 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } chassert(reading); - - // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read - if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + if (query_context->canUseParallelReplicasCustomKey() && query_context->getClientInfo().distributed_depth == 0) { - auto result_ptr = reading->selectRangesToRead(); - - UInt64 rows_to_read = result_ptr->selected_rows; - if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) - rows_to_read = table_expression_query_info.trivial_limit; - - if (max_block_size_limited && (max_block_size_limited < rows_to_read)) - rows_to_read = max_block_size_limited; - - const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; - LOG_TRACE( - getLogger("Planner"), - "Estimated {} rows to read. It is enough work for {} parallel replicas", - rows_to_read, - number_of_replicas_to_use); - - if (number_of_replicas_to_use <= 1) + if (auto cluster = query_context->getClusterForParallelReplicas(); + query_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { - planner_context->getMutableQueryContext()->setSetting( - "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); - LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); - } - else if (number_of_replicas_to_use < settings.max_parallel_replicas) - { - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); - LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); + auto modified_query_info = select_query_info; + modified_query_info.cluster = std::move(cluster); + from_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan_parallel_replicas, + storage->getStorageID(), + modified_query_info, + storage->getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + from_stage, + table_expression_query_info.query_tree, + query_context); + query_plan = std::move(query_plan_parallel_replicas); } } - - // (3) if parallel replicas still enabled - replace reading step - if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + else if (query_context->canUseParallelReplicasOnInitiator()) { - from_stage = QueryProcessingStage::WithMergeableState; - QueryPlan query_plan_parallel_replicas; - ClusterProxy::executeQueryWithParallelReplicas( - query_plan_parallel_replicas, - storage->getStorageID(), - from_stage, - table_expression_query_info.query_tree, - table_expression_query_info.planner_context, - query_context, - table_expression_query_info.storage_limits); - query_plan = std::move(query_plan_parallel_replicas); + // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read + if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + { + auto result_ptr = reading->selectRangesToRead(); + + UInt64 rows_to_read = result_ptr->selected_rows; + if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) + rows_to_read = table_expression_query_info.trivial_limit; + + if (max_block_size_limited && (max_block_size_limited < rows_to_read)) + rows_to_read = max_block_size_limited; + + const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; + LOG_TRACE( + getLogger("Planner"), + "Estimated {} rows to read. It is enough work for {} parallel replicas", + rows_to_read, + number_of_replicas_to_use); + + if (number_of_replicas_to_use <= 1) + { + planner_context->getMutableQueryContext()->setSetting( + "allow_experimental_parallel_reading_from_replicas", Field(0)); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); + } + else if (number_of_replicas_to_use < settings.max_parallel_replicas) + { + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); + LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + } + } + + // (3) if parallel replicas still enabled - replace reading step + if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + { + from_stage = QueryProcessingStage::WithMergeableState; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicas( + query_plan_parallel_replicas, + storage->getStorageID(), + from_stage, + table_expression_query_info.query_tree, + table_expression_query_info.planner_context, + query_context, + table_expression_query_info.storage_limits); + query_plan = std::move(query_plan_parallel_replicas); + } } } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index e752c57b08b..05420cc3301 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -43,6 +43,8 @@ #include #include +#include + namespace DB { @@ -528,7 +530,7 @@ JoinClausesAndActions buildJoinClausesAndActions( size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size(); if (join_clause_key_nodes_size == 0) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "JOIN {} cannot get JOIN keys", + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}", join_node.formatASTForErrorMessage()); for (size_t i = 0; i < join_clause_key_nodes_size; ++i) @@ -768,10 +770,8 @@ std::shared_ptr tryDirectJoin(const std::shared_ptr(table_join, right_table_expression_header, storage, right_table_expression_header_with_storage_column_names); } - } - static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression, @@ -803,9 +803,17 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, algorithm == JoinAlgorithm::DEFAULT) { auto query_context = planner_context->getQueryContext(); - if (table_join->allowParallelHashJoin()) - return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + { + const auto & settings = query_context->getSettingsRef(); + StatsCollectingParams params{ + calculateCacheKey(table_join, right_table_expression), + settings.collect_hash_table_stats_during_joins, + query_context->getServerSettings().max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_joins}; + return std::make_shared( + query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header, params); + } return std::make_shared(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row); } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 4466be5b3a7..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + Chunk Chunk::clone() const { - auto tmp = Chunk(getColumns(), getNumRows()); - tmp.setChunkInfos(chunk_infos.clone()); - return tmp; + return Chunk(getColumns(), getNumRows(), chunk_info); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1348966c0d3..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,9 +1,7 @@ #pragma once -#include #include - -#include +#include namespace DB { @@ -11,29 +9,11 @@ namespace DB class ChunkInfo { public: - using Ptr = std::shared_ptr; - - ChunkInfo() = default; - ChunkInfo(const ChunkInfo&) = default; - ChunkInfo(ChunkInfo&&) = default; - - virtual Ptr clone() const = 0; virtual ~ChunkInfo() = default; + ChunkInfo() = default; }; - -template -class ChunkInfoCloneable : public ChunkInfo -{ -public: - ChunkInfoCloneable() = default; - ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; - - Ptr clone() const override - { - return std::static_pointer_cast(std::make_shared(*static_cast(this))); - } -}; +using ChunkInfoPtr = std::shared_ptr; /** * Chunk is a list of columns with the same length. @@ -52,26 +32,26 @@ public: class Chunk { public: - using ChunkInfoCollection = CollectionOfDerivedItems; - Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_infos(std::move(other.chunk_infos)) + , chunk_info(std::move(other.chunk_info)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_infos = std::move(other.chunk_infos); + chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -82,15 +62,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); + chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); - chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_infos.clear(); + chunk_info.reset(); } const Columns & getColumns() const { return columns; } @@ -101,9 +81,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - ChunkInfoCollection & getChunkInfos() { return chunk_infos; } - const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } - void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } + const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } + bool hasChunkInfo() const { return chunk_info != nullptr; } + void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -127,7 +107,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoCollection chunk_infos; + ChunkInfoPtr chunk_info; void checkNumRowsIsConsistent(); }; @@ -137,15 +117,11 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfoCloneable +class AsyncInsertInfo : public ChunkInfo { public: AsyncInsertInfo() = default; - AsyncInsertInfo(const AsyncInsertInfo & other) = default; - AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) - : offsets(offsets_) - , tokens(tokens_) - {} + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} std::vector offsets; std::vector tokens; @@ -154,11 +130,9 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfoCloneable +class ChunkMissingValues : public ChunkInfo { public: - ChunkMissingValues(const ChunkMissingValues & other) = default; - using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 27f6a454b24..6d5b60d8159 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -292,7 +292,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } else if (last_status == IProcessor::Status::NeedData && status != IProcessor::Status::NeedData) { - processor.input_wait_elapsed_us += processor.input_wait_watch.elapsedMicroseconds(); + processor.input_wait_elapsed_ns += processor.input_wait_watch.elapsedNanoseconds(); } /// PortFull @@ -302,7 +302,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } else if (last_status == IProcessor::Status::PortFull && status != IProcessor::Status::PortFull) { - processor.output_wait_elapsed_us += processor.output_wait_watch.elapsedMicroseconds(); + processor.output_wait_elapsed_ns += processor.output_wait_watch.elapsedNanoseconds(); } } } diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 05669725f9a..17b6773ad83 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -103,10 +103,10 @@ bool ExecutionThreadContext::executeTask() if (profile_processors) { - UInt64 elapsed_microseconds = execution_time_watch->elapsedMicroseconds(); - node->processor->elapsed_us += elapsed_microseconds; + UInt64 elapsed_ns = execution_time_watch->elapsedNanoseconds(); + node->processor->elapsed_ns += elapsed_ns; if (trace_processors) - span->addAttribute("execution_time_ms", elapsed_microseconds); + span->addAttribute("execution_time_ms", elapsed_ns / 1000U); } #ifndef NDEBUG execution_time_ns += execution_time_watch->elapsed(); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d9fab88fe1f..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 25c15d40c9a..cbf73c5cb07 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index ac5da172210..c5336f3bcc7 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -55,10 +56,25 @@ std::vector BinaryFormatReader::readNames() template std::vector BinaryFormatReader::readTypes() { - auto types = readHeaderRow(); - for (const auto & type_name : types) - read_data_types.push_back(DataTypeFactory::instance().get(type_name)); - return types; + read_data_types.reserve(read_columns); + Names type_names; + if (format_settings.binary.decode_types_in_binary_format) + { + type_names.reserve(read_columns); + for (size_t i = 0; i < read_columns; ++i) + { + read_data_types.push_back(decodeDataType(*in)); + type_names.push_back(read_data_types.back()->getName()); + } + } + else + { + type_names = readHeaderRow(); + for (const auto & type_name : type_names) + read_data_types.push_back(DataTypeFactory::instance().get(type_name)); + } + + return type_names; } template diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index ff904f61d22..d4c2348d080 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,15 @@ void BinaryRowOutputFormat::writePrefix() if (with_types) { - for (size_t i = 0; i < columns; ++i) + if (format_settings.binary.encode_types_in_binary_format) { - writeStringBinary(header.safeGetByPosition(i).type->getName(), out); + for (size_t i = 0; i < columns; ++i) + encodeDataType(header.safeGetByPosition(i).type, out); + } + else + { + for (size_t i = 0; i < columns; ++i) + writeStringBinary(header.safeGetByPosition(i).type->getName(), out); } } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index dd7d6c6b024..b7f84748f61 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -616,7 +616,7 @@ void registerCSVSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::CSV); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}, skip_first_lines={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header, settings.csv.skip_first_lines); return result; }); } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 9d056b42101..fe82d1b1c53 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -227,7 +227,12 @@ private: return true; } - String column_name = "_dummy_" + std::to_string(replaced_literals.size()); + /// When generating placeholder names, ensure that we use names + /// requiring quotes to be valid identifiers. This prevents the + /// tuple() function from generating named tuples. Otherwise, + /// inserting named tuples with different names into another named + /// tuple will result in only default values being inserted. + String column_name = "-dummy-" + std::to_string(replaced_literals.size()); replaced_literals.emplace_back(literal, column_name, force_nullable); setDataType(replaced_literals.back()); ast = std::make_shared(column_name); diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index a7a49ab6a8c..38fac60eef6 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -21,9 +21,7 @@ public: buf, header_, 0, - settings.skip_unknown_fields, - settings.null_as_default, - settings.native.allow_types_conversion, + settings, settings.defaults_for_omitted_fields ? &block_missing_values : nullptr)) , header(header_) {} @@ -72,9 +70,9 @@ private: class NativeOutputFormat final : public IOutputFormat { public: - NativeOutputFormat(WriteBuffer & buf, const Block & header, UInt64 client_protocol_version = 0) + NativeOutputFormat(WriteBuffer & buf, const Block & header, const FormatSettings & settings, UInt64 client_protocol_version = 0) : IOutputFormat(header, buf) - , writer(buf, client_protocol_version, header) + , writer(buf, client_protocol_version, header, settings) { } @@ -103,14 +101,17 @@ private: class NativeSchemaReader : public ISchemaReader { public: - explicit NativeSchemaReader(ReadBuffer & in_) : ISchemaReader(in_) {} + explicit NativeSchemaReader(ReadBuffer & in_, const FormatSettings & settings_) : ISchemaReader(in_), settings(settings_) {} NamesAndTypesList readSchema() override { - auto reader = NativeReader(in, 0); + auto reader = NativeReader(in, 0, settings); auto block = reader.read(); return block.getNamesAndTypesList(); } + +private: + const FormatSettings settings; }; @@ -134,16 +135,16 @@ void registerOutputFormatNative(FormatFactory & factory) const Block & sample, const FormatSettings & settings) { - return std::make_shared(buf, sample, settings.client_protocol_version); + return std::make_shared(buf, sample, settings, settings.client_protocol_version); }); } void registerNativeSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &) + factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf); + return std::make_shared(buf, settings); }); } diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index dcd5a531b05..c10969b02b7 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -900,6 +900,11 @@ bool NativeORCBlockInputFormat::prepareStripeReader() orc::RowReaderOptions row_reader_options; row_reader_options.includeTypes(include_indices); + if (format_settings.orc.read_use_writer_time_zone) + { + String writer_time_zone = current_stripe_info->getWriterTimezone(); + row_reader_options.setTimezoneName(writer_time_zone); + } row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); if (format_settings.orc.filter_push_down && sarg) { diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9e499e2c400..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count); - piece.back().setChunkInfos(concatenated.getChunkInfos()); - + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 6d4dcba9e60..d2e17e92924 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -440,9 +440,10 @@ void registerTSVSchemaReader(FormatFactory & factory) settings, is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped); if (!with_names) result += fmt::format( - ", column_names_for_schema_inference={}, try_detect_header={}", + ", column_names_for_schema_inference={}, try_detect_header={}, skip_first_lines={}", settings.column_names_for_schema_inference, - settings.tsv.try_detect_header); + settings.tsv.try_detect_header, + settings.tsv.skip_first_lines); return result; }); } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 46be6e74693..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,9 +8,8 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}) - , input(inputs.front()) - , output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}), + input(inputs.front()), output(outputs.front()) { } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 6f779e7a8d4..02f7b6b3d12 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -303,9 +303,9 @@ public: IQueryPlanStep * getQueryPlanStep() const { return query_plan_step; } size_t getQueryPlanStepGroup() const { return query_plan_step_group; } - uint64_t getElapsedUs() const { return elapsed_us; } - uint64_t getInputWaitElapsedUs() const { return input_wait_elapsed_us; } - uint64_t getOutputWaitElapsedUs() const { return output_wait_elapsed_us; } + uint64_t getElapsedNs() const { return elapsed_ns; } + uint64_t getInputWaitElapsedNs() const { return input_wait_elapsed_ns; } + uint64_t getOutputWaitElapsedNs() const { return output_wait_elapsed_ns; } struct ProcessorDataStats { @@ -369,21 +369,21 @@ protected: private: /// For: - /// - elapsed_us + /// - elapsed_ns friend class ExecutionThreadContext; /// For - /// - input_wait_elapsed_us - /// - output_wait_elapsed_us + /// - input_wait_elapsed_ns + /// - output_wait_elapsed_ns friend class ExecutingGraph; std::string processor_description; /// For processors_profile_log - uint64_t elapsed_us = 0; + uint64_t elapsed_ns = 0; Stopwatch input_wait_watch; - uint64_t input_wait_elapsed_us = 0; + uint64_t input_wait_elapsed_ns = 0; Stopwatch output_wait_watch; - uint64_t output_wait_elapsed_us = 0; + uint64_t output_wait_elapsed_ns = 0; size_t stream_number = NO_STREAM; diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 86675bcb237..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - if (input.chunk.getChunkInfos().empty()) + const auto & info = input.chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - if (auto arenas_info = input.chunk.getChunkInfos().get()) + /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. + if (const auto * arenas_info = typeid_cast(info.get())) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().getChunkInfos().add(std::make_shared()); + chunks.back().setChunkInfo(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index e4f22deec8d..bcf4e759024 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,22 +6,18 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfoCloneable +class MergeTreePartLevelInfo : public ChunkInfo { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) - : origin_merge_tree_part_level(part_level) - { } - MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; - + explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto part_level_info = chunk.getChunkInfos().get(); - if (part_level_info) + const auto & info = chunk.getChunkInfo(); + if (const auto * part_level_info = typeid_cast(info.get())) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index cd347d371d9..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2f23f2a5c4d..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace Poco { @@ -15,13 +14,11 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfoCloneable +struct ChunkSelectFinalIndices : public ChunkInfo { - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); - ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; - const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index b1b0182a113..fbb47969b2f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) + if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index be629271736..c218f622870 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) + if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp new file mode 100644 index 00000000000..3601a68d36e --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -0,0 +1,85 @@ +#include + +namespace DB +{ + +BufferChunksTransform::BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_) + : IProcessor({header_}, {header_}) + , input(inputs.front()) + , output(outputs.front()) + , max_rows_to_buffer(max_rows_to_buffer_) + , max_bytes_to_buffer(max_bytes_to_buffer_) + , limit(limit_) +{ +} + +IProcessor::Status BufferChunksTransform::prepare() +{ + if (output.isFinished()) + { + chunks = {}; + input.close(); + return Status::Finished; + } + + if (input.isFinished() && chunks.empty()) + { + output.finish(); + return Status::Finished; + } + + if (output.canPush()) + { + input.setNeeded(); + + if (!chunks.empty()) + { + auto chunk = std::move(chunks.front()); + chunks.pop(); + + num_buffered_rows -= chunk.getNumRows(); + num_buffered_bytes -= chunk.bytes(); + + output.push(std::move(chunk)); + } + else if (input.hasData()) + { + auto chunk = pullChunk(); + output.push(std::move(chunk)); + } + } + + if (input.hasData() && (num_buffered_rows < max_rows_to_buffer || num_buffered_bytes < max_bytes_to_buffer)) + { + auto chunk = pullChunk(); + num_buffered_rows += chunk.getNumRows(); + num_buffered_bytes += chunk.bytes(); + chunks.push(std::move(chunk)); + } + + if (num_buffered_rows >= max_rows_to_buffer && num_buffered_bytes >= max_bytes_to_buffer) + { + input.setNotNeeded(); + return Status::PortFull; + } + + input.setNeeded(); + return Status::NeedData; +} + +Chunk BufferChunksTransform::pullChunk() +{ + auto chunk = input.pull(); + num_processed_rows += chunk.getNumRows(); + + if (limit && num_processed_rows >= limit) + input.close(); + + return chunk; +} + +} diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h new file mode 100644 index 00000000000..752f9910734 --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -0,0 +1,42 @@ +#pragma once +#include +#include + +namespace DB +{ + +/// Transform that buffers chunks from the input +/// up to the certain limit and pushes chunks to +/// the output whenever it is ready. It can be used +/// to increase parallelism of execution, for example +/// when it is adeded before MergingSortedTransform. +class BufferChunksTransform : public IProcessor +{ +public: + /// OR condition is used for the limits on rows and bytes. + BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_); + + Status prepare() override; + String getName() const override { return "BufferChunks"; } + +private: + Chunk pullChunk(); + + InputPort & input; + OutputPort & output; + + size_t max_rows_to_buffer; + size_t max_bytes_to_buffer; + size_t limit; + + std::queue chunks; + size_t num_buffered_rows = 0; + size_t num_buffered_bytes = 0; + size_t num_processed_rows = 0; +}; + +} diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b1ab5561958..c48bdf1552a 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -46,6 +46,10 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); +/// Replace chain `FilterStep -> FilterStep` to single FilterStep +/// Note: this breaks short-circuit logic, so it is disabled for now. +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + /// Move FilterStep down if possible. /// May split FilterStep and push down only part of it. size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); @@ -81,11 +85,12 @@ size_t tryAggregatePartitionsIndependently(QueryPlan::Node * node, QueryPlan::No inline const auto & getOptimizations() { - static const std::array optimizations = {{ + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::lift_up_array_join}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::push_down_limit}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::split_filter}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::merge_expressions}, + {tryMergeFilters, "mergeFilters", &QueryPlanOptimizationSettings::merge_filters}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, {tryConvertOuterJoinToInnerJoin, "convertOuterJoinToInnerJoin", &QueryPlanOptimizationSettings::convert_outer_join_to_inner_join}, {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting}, diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 2738de1ff5f..4d984133efd 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -20,6 +20,8 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.merge_expressions = from.query_plan_enable_optimizations && from.query_plan_merge_expressions; + settings.merge_filters = from.query_plan_enable_optimizations && from.query_plan_merge_filters; + settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down; settings.convert_outer_join_to_inner_join = from.query_plan_enable_optimizations && from.query_plan_convert_outer_join_to_inner_join; diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 85042cea4ed..539ff2eafbb 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -31,6 +31,9 @@ struct QueryPlanOptimizationSettings /// If merge-expressions optimization is enabled. bool merge_expressions = true; + /// If merge-filters optimization is enabled. + bool merge_filters = false; + /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 6ace1b3b5ce..118abdd701f 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -34,7 +34,6 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); - auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -76,7 +75,23 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } - else if (parent_filter && child_filter) + + return 0; +} +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return false; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * parent_filter = typeid_cast(parent.get()); + auto * child_filter = typeid_cast(child.get()); + + if (parent_filter && child_filter) { const auto & child_actions = child_filter->getExpression(); const auto & parent_actions = parent_filter->getExpression(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 537555afa2a..e1ef38022b5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -919,15 +919,23 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto & union_node = node.children.front(); - std::vector infos; + bool use_buffering = false; const SortDescription * max_sort_descr = nullptr; + + std::vector infos; infos.reserve(node.children.size()); + for (auto * child : union_node->children) { infos.push_back(buildInputOrderInfo(*sorting, *child, steps_to_update)); - if (infos.back() && (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size())) - max_sort_descr = &infos.back()->sort_description_for_merging; + if (infos.back()) + { + if (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size()) + max_sort_descr = &infos.back()->sort_description_for_merging; + + use_buffering |= infos.back()->limit == 0; + } } if (!max_sort_descr || max_sort_descr->empty()) @@ -972,12 +980,13 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) } } - sorting->convertToFinishSorting(*max_sort_descr); + sorting->convertToFinishSorting(*max_sort_descr, use_buffering); } else if (auto order_info = buildInputOrderInfo(*sorting, *node.children.front(), steps_to_update)) { - sorting->convertToFinishSorting(order_info->sort_description_for_merging); - /// update data stream's sorting properties + /// Use buffering only if have filter or don't have limit. + bool use_buffering = order_info->limit == 0; + sorting->convertToFinishSorting(order_info->sort_description_for_merging, use_buffering); updateStepsDataStreams(steps_to_update); } } @@ -1091,7 +1100,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, bool can_read = read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit); if (!can_read) return 0; - sorting->convertToFinishSorting(order_info->sort_description_for_merging); + sorting->convertToFinishSorting(order_info->sort_description_for_merging, false); } return 0; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 8f40e523b42..1c40f84d23d 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -8,6 +7,7 @@ #include #include #include +#include #include #include @@ -38,6 +38,7 @@ SortingStep::Settings::Settings(const Context & context) tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; max_block_bytes = settings.prefer_external_sort_block_bytes; + read_in_order_use_buffering = settings.read_in_order_use_buffering; } SortingStep::Settings::Settings(size_t max_block_size_) @@ -153,10 +154,11 @@ void SortingStep::updateLimit(size_t limit_) } } -void SortingStep::convertToFinishSorting(SortDescription prefix_description_) +void SortingStep::convertToFinishSorting(SortDescription prefix_description_, bool use_buffering_) { type = Type::FinishSorting; prefix_description = std::move(prefix_description_); + use_buffering = use_buffering_; } void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline) @@ -244,6 +246,14 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) { + if (use_buffering && sort_settings.read_in_order_use_buffering) + { + pipeline.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, sort_settings.max_block_size, sort_settings.max_block_bytes, limit_); + }); + } + auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), @@ -373,9 +383,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit)); if (need_finish_sorting) - { finishSorting(pipeline, prefix_description, result_description, limit); - } + return; } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 49dcf9f3121..b4a49394a13 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -28,6 +28,7 @@ public: TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; size_t max_block_bytes = 0; + size_t read_in_order_use_buffering = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); @@ -80,7 +81,7 @@ public: const SortDescription & getSortDescription() const { return result_description; } - void convertToFinishSorting(SortDescription prefix_description); + void convertToFinishSorting(SortDescription prefix_description, bool use_buffering_); Type getType() const { return type; } const Settings & getSettings() const { return sort_settings; } @@ -126,6 +127,7 @@ private: UInt64 limit; bool always_read_till_end = false; + bool use_buffering = false; Settings sort_settings; diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index c05cc1defcb..30cf958c072 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..5f9f9f9b1a1 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk); - cur_chunk = std::move(chunk); + consume(chunk.clone()); + if (!lastBlockIsDuplicate()) + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c728fa87b1e..023bbd8b094 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,7 +18,8 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk & chunk) = 0; + virtual void consume(Chunk chunk) = 0; + virtual bool lastBlockIsDuplicate() const { return false; } private: std::vector table_locks; @@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk &) override {} + void consume(Chunk) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index 7ac460c14e2..ec0dc9609f1 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,10 +43,7 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - auto chunk = Chunk(res.getColumns(), res.rows()); - chunk.getChunkInfos().add(std::move(info)); - - return chunk; + return Chunk(res.getColumns(), res.rows(), std::move(info)); } private: diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 37a84d9fe96..a3d6fd691d8 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -193,7 +193,15 @@ PostgreSQLSource::~PostgreSQLSource() { if (stream) { + /** Internally libpqxx::stream_from runs PostgreSQL copy query `COPY query TO STDOUT`. + * During transaction abort we try to execute PostgreSQL `ROLLBACK` command and if + * copy query is not cancelled, we wait until it finishes. + */ tx->conn().cancel_query(); + + /** If stream is not closed, libpqxx::stream_from closes stream in destructor, but that way + * exception is added into transaction pending error and we can potentially ignore exception message. + */ stream->close(); } diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 1578bd389c9..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 9abe0504d10..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,9 +5,7 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) -{ -} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 45b0960ec8f..9ffe15d0f85 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); + to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - { finalizeChunk(chunk, aggregates_mask); - } - else if (!chunk.getChunkInfos().get()) + else if (!chunk.getChunkInfo()) { - chunk.getChunkInfos().add(std::make_shared()); + auto info = std::make_shared(); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 41a0d7fc7f1..5d50e97f552 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB { @@ -13,12 +12,10 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable +struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} - Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 517f035667f..cdbe194cfac 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -44,11 +44,15 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info.get(); + return agg_info; } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -206,7 +210,11 @@ private: void process(Chunk && chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + if (!chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); + + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); @@ -775,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 95983c39d1e..e167acde067 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfoCloneable +class AggregatedChunkInfo : public ChunkInfo { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 49a6581e685..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,12 +27,18 @@ public: } ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } } protected: void onConsume(Chunk chunk) override { - cur_chunk = Squashing::squash(std::move(chunk)); + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } GenerateResult onGenerate() override @@ -42,10 +48,16 @@ protected: res.is_done = true; return res; } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } private: Squashing squashing; Chunk cur_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index e43aa6028da..cdae8c23a3e 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -31,6 +32,7 @@ CheckConstraintsTransform::CheckConstraintsTransform( , table_id(table_id_) , constraints_to_check(constraints_.filterConstraints(ConstraintsDescription::ConstraintType::CHECK)) , expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) + , context(std::move(context_)) { } @@ -39,6 +41,10 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) { if (chunk.getNumRows() > 0) { + if (rows_written == 0) + for (const auto & expression : expressions) + VirtualColumnUtils::buildSetsForDAG(expression->getActionsDAG(), context); + Block block_to_calculate = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); for (size_t i = 0; i < expressions.size(); ++i) { diff --git a/src/Processors/Transforms/CheckConstraintsTransform.h b/src/Processors/Transforms/CheckConstraintsTransform.h index 09833ff396b..f92d0ab855e 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.h +++ b/src/Processors/Transforms/CheckConstraintsTransform.h @@ -35,6 +35,7 @@ private: StorageID table_id; const ASTs constraints_to_check; const ConstraintsExpressions expressions; + ContextPtr context; size_t rows_written = 0; Chunk cur_chunk; }; diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 2c6b3bd8638..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,7 +1,6 @@ -#include -#include #include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp deleted file mode 100644 index 6786f76cbef..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include - -#include - -#include -#include -#include - - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void RestoreChunkInfosTransform::transform(Chunk & chunk) -{ - chunk.getChunkInfos().append(chunk_infos.clone()); -} - -namespace DeduplicationToken -{ - -String TokenInfo::getToken() const -{ - if (!isDefined()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); - - return getTokenImpl(); -} - -String TokenInfo::getTokenImpl() const -{ - String result; - result.reserve(getTotalSize()); - - for (const auto & part : parts) - { - if (!result.empty()) - result.append(":"); - result.append(part); - } - - return result; -} - -String TokenInfo::debugToken() const -{ - return getTokenImpl(); -} - -void TokenInfo::addChunkHash(String part) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(std::move(part)); -} - -void TokenInfo::finishChunkHashes() -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - stage = DEFINED; -} - -void TokenInfo::setUserToken(const String & token) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_USER_TOKEN; - - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("user-token-{}", token)); -} - -void TokenInfo::setSourceWithUserToken(size_t block_number) -{ - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("source-number-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::setViewID(const String & id) -{ - if (stage == DEFINED) - stage = DEFINE_VIEW; - - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-id-{}", id)); -} - -void TokenInfo::setViewBlockNumber(size_t block_number) -{ - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-block-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::reset() -{ - stage = UNDEFINED; - parts.clear(); -} - -void TokenInfo::addTokenPart(String part) -{ - parts.push_back(std::move(part)); -} - -size_t TokenInfo::getTotalSize() const -{ - if (parts.empty()) - return 0; - - size_t size = 0; - for (const auto & part : parts) - size += part.size(); - - // we reserve more size here to be able to add delimenter between parts. - return size + parts.size() - 1; -} - -#ifdef ABORT_ON_LOGICAL_ERROR -void CheckTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); -} -#endif - -String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) -{ - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); -} - - -void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); - - if (token_info->isDefined()) - return; - - token_info->addChunkHash(getChunkHash(chunk)); - token_info->finishChunkHashes(); -} - -void SetUserTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); - token_info->setUserToken(user_token); -} - -void SetSourceBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceWithUserToken(block_number++); -} - -void SetViewIDTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewIDTransform"); - token_info->setViewID(view_id); -} - -void SetViewBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); -} - -void ResetTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - - token_info->reset(); -} - -} -} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h deleted file mode 100644 index d6aff9e1370..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once - -#include -#include - -#include -#include "Common/Logger.h" - - -namespace DB -{ - class RestoreChunkInfosTransform : public ISimpleTransform - { - public: - RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , chunk_infos(std::move(chunk_infos_)) - {} - - String getName() const override { return "RestoreChunkInfosTransform"; } - - void transform(Chunk & chunk) override; - - private: - Chunk::ChunkInfoCollection chunk_infos; - }; - - -namespace DeduplicationToken -{ - class TokenInfo : public ChunkInfoCloneable - { - public: - TokenInfo() = default; - TokenInfo(const TokenInfo & other) = default; - - String getToken() const; - String debugToken() const; - - bool empty() const { return parts.empty(); } - - bool isDefined() const { return stage == DEFINED; } - - void addChunkHash(String part); - void finishChunkHashes(); - - void setUserToken(const String & token); - void setSourceWithUserToken(size_t block_number); - - void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); - - void reset(); - - private: - String getTokenImpl() const; - - void addTokenPart(String part); - size_t getTotalSize() const; - - /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the following order. - * Firstly token is expanded with information about the source. - * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken - * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken - * - * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. - * - * transition // method - * DEFINED -> DEFINE_VIEW // setViewID - * DEFINE_VIEW -> DEFINED // defineViewID - */ - - enum BuildingStage - { - UNDEFINED, - DEFINE_SOURCE_WITH_HASHES, - DEFINE_SOURCE_USER_TOKEN, - DEFINE_VIEW, - DEFINED, - }; - - BuildingStage stage = UNDEFINED; - std::vector parts; - }; - - -#ifdef ABORT_ON_LOGICAL_ERROR - /// use that class only with debug builds in CI for introspection - class CheckTokenTransform : public ISimpleTransform - { - public: - CheckTokenTransform(String debug_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(std::move(debug_)) - { - } - - String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String debug; - LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - }; -#endif - - - class AddTokenInfoTransform : public ISimpleTransform - { - public: - explicit AddTokenInfoTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared()); - } - }; - - - class DefineSourceWithChunkHashTransform : public ISimpleTransform - { - public: - explicit DefineSourceWithChunkHashTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. - // But if there is some table with different engine, we still need to define the source of the data in deduplication token - // We use that transform to define the source as a hash of entire block in deduplication token - void transform(Chunk & chunk) override; - - static String getChunkHash(const Chunk & chunk); - }; - - class ResetTokenTransform : public ISimpleTransform - { - public: - explicit ResetTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } - - void transform(Chunk & chunk) override; - }; - - - class SetUserTokenTransform : public ISimpleTransform - { - public: - SetUserTokenTransform(String user_token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , user_token(std::move(user_token_)) - { - } - - String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String user_token; - }; - - - class SetSourceBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetSourceBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - - - class SetViewIDTransform : public ISimpleTransform - { - public: - SetViewIDTransform(String view_id_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , view_id(std::move(view_id_)) - { - } - - String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } - - void transform(Chunk & chunk) override; - - private: - String view_id; - }; - - - class SetViewBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetViewBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - -} -} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 04fabc9a3c6..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,7 +1,5 @@ #include #include - - namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index ca204bcb482..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,9 +365,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - task = data.chunk.getChunkInfos().get(); - if (!task) + if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -478,7 +479,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.getChunkInfos().add(std::make_shared()); + chunk.setChunkInfo(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -495,7 +496,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.getChunkInfos().add(std::move(task)); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5f6d9d6fff2..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,7 +1,6 @@ #pragma once #include -#include -#include + namespace DB { @@ -112,12 +111,11 @@ private: }; -class DelayedBlocksTask : public ChunkInfoCloneable +class DelayedBlocksTask : public ChunkInfo { public: DelayedBlocksTask() = default; - DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 9ae80e21a68..1eaa5458d37 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,7 +1,6 @@ #include #include - namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d7bc320173b..607087fb39c 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,7 +150,11 @@ private: if (!chunk.hasRows()) return; - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index ea9ebb0f96e..fc40c6894bb 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_shared(std::move(chunks)); + info->chunks = std::make_unique(std::move(chunks)); Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); output.push(std::move(chunk)); } @@ -255,10 +255,11 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -274,7 +275,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -303,11 +304,7 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - - auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); - chunk.getChunkInfos().add(std::move(chunk_info)); - - chunks_map[bucket].emplace_back(std::move(chunk)); + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); } } } @@ -322,7 +319,9 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); + if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -331,10 +330,11 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - if (cur_chunk.getChunkInfos().empty()) + const auto & cur_info = cur_chunk.getChunkInfo(); + if (!cur_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (auto agg_info = cur_chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (cur_chunk.getChunkInfos().get()) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.getChunkInfos().add(std::move(res_info)); + chunk.setChunkInfo(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,7 +405,11 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 3a3c1bd9c1e..77ee3034ffc 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -143,9 +142,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfoCloneable +struct ChunksToMerge : public ChunkInfo { - std::shared_ptr chunks; + std::unique_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 446e60a0b81..ad723da7527 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,10 +32,11 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -48,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -88,8 +89,7 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index ee4dfa6a64e..0f433165f14 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header_, header_) - , squashing(header_, min_block_size_rows, min_block_size_bytes) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - squashed_chunk = squashing.add(std::move(chunk)); + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk) + if (!squashed_chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,11 +33,12 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return bool(squashed_chunk); + return squashed_chunk.hasChunkInfo(); } Chunk PlanSquashingTransform::getRemaining() { - return squashing.flush(); + Chunk current_chunk = squashing.flush(); + return current_chunk; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index e6db245499e..4ad2ec2d089 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,6 +23,7 @@ protected: private: Squashing squashing; Chunk squashed_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index b44f5a3203e..480ab1a0f61 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - auto select_final_indices_info = chunk.getChunkInfos().extract(); + const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,6 +41,7 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } + chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1fb4433240a..b5a40c75c5b 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; +extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } SquashingTransform::SquashingTransform( @@ -18,7 +19,9 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +34,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = Squashing::squash(squashing.flush()); + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } void SquashingTransform::work() @@ -44,7 +50,6 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); - if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -52,49 +57,170 @@ void SquashingTransform::work() } } -SimpleSquashingTransform::SimpleSquashingTransform( +SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - chunk = Squashing::squash(squashing.add(std::move(chunk))); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - chunk = Squashing::squash(squashing.flush()); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + return std::move(squashed_chunk); +} + +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} + +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + return std::move(squashed_chunk); +} + +SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block SquashingLegacy::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Block SquashingLegacy::add(const Block & input_block) +{ + return addImpl(input_block); +} + +/* + * To minimize copying, accept two types of argument: const reference for output + * stream, and rvalue reference for input stream, and decide whether to copy + * inside this function. This allows us not to copy Block unless we absolutely + * have to. + */ +template +Block SquashingLegacy::addImpl(ReferenceType input_block) +{ + /// End of input stream. + if (!input_block) { - if (output.isFinished()) - return Status::Finished; + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } - if (!output.canPush()) - return Status::PortFull; - - if (has_output) + /// Just read block is already enough. + if (isEnoughSize(input_block)) + { + /// If no accumulated data, return just read block. + if (!accumulated_block) { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; + return std::move(input_block); } - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; } - return ISimpleTransform::prepare(); + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_block)) + { + /// Return accumulated data and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; + } + + append(std::move(input_block)); + if (isEnoughSize(accumulated_block)) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Squashed block is not ready. + return {}; } + + +template +void SquashingLegacy::append(ReferenceType input_block) +{ + if (!accumulated_block) + { + accumulated_block = std::move(input_block); + return; + } + + assert(blocksHaveEqualStructure(input_block, accumulated_block)); + + try + { + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_block.getByPosition(i).column; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } + } + catch (...) + { + /// add() may be called again even after a previous add() threw an exception. + /// Keep accumulated_block in a valid state. + /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will + /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". + accumulated_block.clear(); + throw; + } +} + + +bool SquashingLegacy::isEnoughSize(const Block & block) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const auto & [column, type, name] : block) + { + if (!column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); + + if (!rows) + rows = column->size(); + else if (rows != column->size()) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); + + bytes += column->byteSize(); + } + + return isEnoughSize(rows, bytes); +} + + +bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} + + } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index c5b727ac6ec..452317e7d5e 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -29,22 +30,51 @@ private: Chunk finish_chunk; }; -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingTransform : public ISimpleTransform + +class SquashingLegacy { public: - explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. + SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + /** Add next block and possibly returns squashed block. + * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. + */ + Block add(Block && block); + Block add(const Block & block); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + template + Block addImpl(ReferenceType block); + + template + void append(ReferenceType block); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + +class SimpleSquashingChunksTransform : public IInflatingTransform +{ +public: + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: - Squashing squashing; - - bool finished = false; + SquashingLegacy squashing; + Chunk squashed_chunk; }; + } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 59fceccb538..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,7 +150,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index b9f61d30182..a694fa43e46 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include @@ -71,6 +74,9 @@ public: size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; // Compares ORDER BY column values at given rows to find the boundaries of frame: @@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_, } } } + + for (const auto & workspace : workspaces) + { + if (workspace.window_function_impl) + { + if (!workspace.window_function_impl->checkWindowFrameType(this)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'", + workspace.aggregate_function->getName()); + } + } + + } } WindowTransform::~WindowTransform() @@ -1609,6 +1628,34 @@ struct WindowFunctionHelpers { recurrent_detail::setValueToOutputColumn(transform, function_index, value); } + + ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; } + + ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) + { + /// This is for fast check. + if (!transform->partition_ended) + return false; + + auto current_row = transform->current_row; + /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here. + current_row.row++; + const auto & partition_end_row = transform->partition_end; + + /// The partition end is reached, when following is true + /// - current row is the partition end row, + /// - or current row is the last row of all input. + if (current_row != partition_end_row) + { + /// when current row is not the partition end row, we need to check whether it's the last + /// input row. + if (current_row.row < transform->blockRowsNumber(current_row)) + return false; + if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) + return false; + } + return true; + } }; template @@ -2058,8 +2105,6 @@ namespace const WindowTransform * transform, size_t function_index, const DataTypes & argument_types); - - static void checkWindowFrameType(const WindowTransform * transform); }; } @@ -2080,6 +2125,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction bool allocatesMemoryInArena() const override { return false; } + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->order_by_indices.empty()) + { + LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause"); + return false; + } + + // We must wait all for the partition end and get the total rows number in this + // partition. So before the end of this partition, there is no any block could be + // dropped out. + bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded + && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; + if (!is_frame_supported) + { + LOG_ERROR( + getLogger("WindowFunctionNtile"), + "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } + std::optional getDefaultFrame() const override { WindowFrame frame; @@ -2106,7 +2174,6 @@ namespace { if (!buckets) [[unlikely]] { - checkWindowFrameType(transform); const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]]; @@ -2128,7 +2195,7 @@ namespace } } // new partition - if (transform->current_row_number == 1) [[unlikely]] + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]] { current_partition_rows = 0; current_partition_inserted_row = 0; @@ -2137,25 +2204,9 @@ namespace current_partition_rows++; // Only do the action when we meet the last row in this partition. - if (!transform->partition_ended) + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) return; - else - { - auto current_row = transform->current_row; - current_row.row++; - const auto & end_row = transform->partition_end; - if (current_row != end_row) - { - if (current_row.row < transform->blockRowsNumber(current_row)) - return; - if (end_row.block != current_row.block + 1 || end_row.row) - { - return; - } - // else, current_row is the last input row. - } - } auto bucket_capacity = current_partition_rows / buckets; auto capacity_diff = current_partition_rows - bucket_capacity * buckets; @@ -2193,23 +2244,115 @@ namespace bucket_num += 1; } } +} - void NtileState::checkWindowFrameType(const WindowTransform * transform) +namespace +{ +struct PercentRankState +{ + RowNumber start_row; + UInt64 current_partition_rows = 0; +}; +} + +struct WindowFunctionPercentRank final : public StatefulWindowFunction +{ +public: + WindowFunctionPercentRank(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + {} + + bool allocatesMemoryInArena() const override { return false; } + + bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->order_by_indices.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); + if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); + return false; + } + return true; + } - // We must wait all for the partition end and get the total rows number in this - // partition. So before the end of this partition, there is no any block could be - // dropped out. - bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded - && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; - if (!is_frame_supported) + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::RANGE; + frame.begin_type = WindowFrame::BoundaryType::Unbounded; + frame.end_type = WindowFrame::BoundaryType::Current; + return frame; + } + + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override + { + auto & state = getWorkspaceState(transform, function_index); + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + state.current_partition_rows = 0; + state.start_row = transform->current_row; + } + + insertRankIntoColumn(transform, function_index); + state.current_partition_rows++; + + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) + { + return; + } + + UInt64 remaining_rows = state.current_partition_rows; + Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1; + + while (remaining_rows > 0) + { + auto block_rows_number = transform->blockRowsNumber(state.start_row); + auto available_block_rows = block_rows_number - state.start_row.row; + if (available_block_rows <= remaining_rows) + { + /// This partition involves multiple blocks. Finish current block and move on to the + /// next block. + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row; i < block_rows_number; ++i) + data[i] = (data[i] - 1) / percent_rank_denominator; + + state.start_row.block++; + state.start_row.row = 0; + remaining_rows -= available_block_rows; + } + else + { + /// The partition ends in current block.s + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) + { + data[i] = (data[i] - 1) / percent_rank_denominator; + } + state.start_row.row += remaining_rows; + remaining_rows = 0; + } } } -} + + + inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const + { + const auto & workspace = transform->workspaces[function_index]; + return getState(workspace); + } + + inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const + { + auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number)); + } +}; // ClickHouse-specific variant of lag/lead that respects the window frame. template @@ -2582,6 +2725,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("percent_rank", {[](const std::string & name, + const DataTypes & argument_types, const Array & parameters, const Settings *) + { + return std::make_shared(name, argument_types, + parameters); + }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 312b333ab33..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -18,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,12 +24,9 @@ #include #include #include -#include "base/defines.h" -#include #include #include -#include namespace ProfileEvents @@ -111,7 +105,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -122,7 +116,6 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; - bool disable_deduplication_for_children; struct State { @@ -145,7 +138,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageLiveView & live_view; @@ -159,7 +152,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageWindowView & window_view; @@ -223,10 +216,45 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + * + * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables + * are involved. + * + * Example: + * + * landing -┬--> mv_1_1 --┬-> ds_1_1 + * | | + * └--> mv_1_2 --┘ + * + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (view_id.hasUUID()) + insert_deduplication_token += "_" + toString(view_id.uuid); + else + insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -333,13 +361,7 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter( - nullptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -356,10 +378,6 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", out.getInputHeader())); -#endif - auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -402,19 +420,11 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); -#endif - auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); + storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); -#endif } return out; @@ -455,7 +465,11 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; + /// If the "root" table deduplicates blocks, there are no need to make deduplication for children + /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks + bool disable_deduplication_for_children = false; + if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -546,25 +560,12 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - else if (dynamic_cast(storage.get())) - { - auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); - metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); - sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -572,15 +573,8 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - - result_chain.addSource(std::make_shared(sink->getHeader())); - result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(storage_header)); - } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -596,7 +590,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { const auto & context = view.context; @@ -643,19 +637,6 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - - if (!disable_deduplication_for_children) - { - String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - else - { - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -747,19 +728,17 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_, - bool disable_deduplication_for_children_) + std::shared_ptr views_data_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) - , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); + state.emplace(process(block, view, *views_data)); } @@ -791,10 +770,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk & chunk) +void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -814,11 +793,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk & chunk) +void PushingToWindowViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index a9e5b1535c0..f0b2ead687e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index bb2be2c8ffb..2cd4dc42a83 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,12 +16,6 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolde return *this; } -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept -{ - append(std::move(rhs)); - return *this; -} - QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index 10f7f39ab09..ed9eb68b7ba 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,11 +20,8 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; - /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; - QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 1686a101bde..14457d2df43 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -104,8 +104,16 @@ RemoteQueryExecutor::RemoteQueryExecutor( connection_entries.emplace_back(std::move(result.entry)); } + else + { + chassert(!fail_message.empty()); + if (result.entry.isNull()) + LOG_DEBUG(log, "Failed to connect to replica {}. {}", pool->getAddress(), fail_message); + else + LOG_DEBUG(log, "Replica is not usable for remote query execution: {}. {}", pool->getAddress(), fail_message); + } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); @@ -127,7 +135,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, &connection, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -148,7 +156,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connection_ptr, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection_ptr, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection_ptr, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -169,7 +177,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable { - auto res = std::make_unique(std::move(connections_), context->getSettingsRef(), throttler); + auto res = std::make_unique(std::move(connections_), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -234,7 +242,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func); } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension && extension->replica_info) res->setReplicaInfo(*extension->replica_info); return res; diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp new file mode 100644 index 00000000000..ac43bfd64c0 --- /dev/null +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -0,0 +1,265 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if USE_SSL +#include +#endif + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int BAD_ARGUMENTS; + extern const int SUPPORT_IS_DISABLED; +} + + +namespace +{ + /// Throws an exception that multiple authorization schemes are used simultaneously. + [[noreturn]] void throwMultipleAuthenticationMethods(std::string_view method1, std::string_view method2) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed to use {} and {} simultaneously", method1, method2); + } + + /// Checks that a specified user name is not empty, and throws an exception if it's empty. + void checkUserNameNotEmpty(const String & user_name, std::string_view method) + { + if (user_name.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Got an empty user name from {}", method); + } +} + + +bool authenticateUserByHTTP( + const HTTPServerRequest & request, + const HTMLForm & params, + HTTPServerResponse & response, + Session & session, + std::unique_ptr & request_credentials, + ContextPtr global_context, + LoggerPtr log) +{ + /// Get the credentials created by the previous call of authenticateUserByHTTP() while handling the previous HTTP request. + auto current_credentials = std::move(request_credentials); + + /// The user and password can be passed by headers (similar to X-Auth-*), + /// which is used by load balancers to pass authentication information. + std::string user = request.get("X-ClickHouse-User", ""); + std::string password = request.get("X-ClickHouse-Key", ""); + std::string quota_key = request.get("X-ClickHouse-Quota", ""); + bool has_auth_headers = !user.empty() || !password.empty(); + + /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name + /// extracted from the SSL certificate used for this connection instead of checking password. + bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); + + /// User name and password can be passed using HTTP Basic auth or query parameters + /// (both methods are insecure). + bool has_http_credentials = request.hasCredentials(); + bool has_credentials_in_query_params = params.has("user") || params.has("password"); + + std::string spnego_challenge; + SSLCertificateSubjects certificate_subjects; + + if (has_ssl_certificate_auth) + { +#if USE_SSL + /// For SSL certificate authentication we extract the user name from the "X-ClickHouse-User" HTTP header. + checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers"); + + /// It is prohibited to mix different authorization schemes. + if (!password.empty()) + throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via password"); + if (has_http_credentials) + throwMultipleAuthenticationMethods("SSL certificate authentication", "Authorization HTTP header"); + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via parameters"); + + if (request.havePeerCertificate()) + certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); + + if (certificate_subjects.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "SSL certificate authentication disabled because ClickHouse was built without SSL library"); +#endif + } + else if (has_auth_headers) + { + checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers"); + + /// It is prohibited to mix different authorization schemes. + if (has_http_credentials) + throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "Authorization HTTP header"); + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "authentication via parameters"); + } + else if (has_http_credentials) + { + /// It is prohibited to mix different authorization schemes. + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("Authorization HTTP header", "authentication via parameters"); + + std::string scheme; + std::string auth_info; + request.getCredentials(scheme, auth_info); + + if (Poco::icompare(scheme, "Basic") == 0) + { + Poco::Net::HTTPBasicCredentials credentials(auth_info); + user = credentials.getUsername(); + password = credentials.getPassword(); + checkUserNameNotEmpty(user, "Authorization HTTP header"); + } + else if (Poco::icompare(scheme, "Negotiate") == 0) + { + spnego_challenge = auth_info; + + if (spnego_challenge.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); + } + else + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); + } + } + else + { + /// If the user name is not set we assume it's the 'default' user. + user = params.get("user", "default"); + password = params.get("password", ""); + checkUserNameNotEmpty(user, "authentication via parameters"); + } + + if (!certificate_subjects.empty()) + { + chassert(!user.empty()); + if (!current_credentials) + current_credentials = std::make_unique(user, std::move(certificate_subjects)); + + auto * certificate_credentials = dynamic_cast(current_credentials.get()); + if (!certificate_credentials) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); + } + else if (!spnego_challenge.empty()) + { + if (!current_credentials) + current_credentials = global_context->makeGSSAcceptorContext(); + + auto * gss_acceptor_context = dynamic_cast(current_credentials.get()); + if (!gss_acceptor_context) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log)); +#pragma clang diagnostic pop + + if (!spnego_response.empty()) + response.set("WWW-Authenticate", "Negotiate " + spnego_response); + + if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) + { + if (spnego_response.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + } + else // I.e., now using user name and password strings ("Basic"). + { + if (!current_credentials) + current_credentials = std::make_unique(); + + auto * basic_credentials = dynamic_cast(current_credentials.get()); + if (!basic_credentials) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); + + chassert(!user.empty()); + basic_credentials->setUserName(user); + basic_credentials->setPassword(password); + } + + if (params.has("quota_key")) + { + if (!quota_key.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid authentication: it is not allowed " + "to use quota key as HTTP header and as parameter simultaneously"); + + quota_key = params.get("quota_key"); + } + + /// Set client info. It will be used for quota accounting parameters in 'setUser' method. + + session.setHTTPClientInfo(request); + session.setQuotaClientKey(quota_key); + + /// Extract the last entry from comma separated list of forwarded_for addresses. + /// Only the last proxy can be trusted (if any). + String forwarded_address = session.getClientInfo().getLastForwardedFor(); + try + { + if (!forwarded_address.empty() && global_context->getConfigRef().getBool("auth_use_forwarded_address", false)) + session.authenticate(*current_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port())); + else + session.authenticate(*current_credentials, request.clientAddress()); + } + catch (const Authentication::Require & required_credentials) + { + current_credentials = std::make_unique(); + + if (required_credentials.getRealm().empty()) + response.set("WWW-Authenticate", "Basic"); + else + response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\""); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + catch (const Authentication::Require & required_credentials) + { + current_credentials = global_context->makeGSSAcceptorContext(); + + if (required_credentials.getRealm().empty()) + response.set("WWW-Authenticate", "Negotiate"); + else + response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\""); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + + return true; +} + +} diff --git a/src/Server/HTTP/authenticateUserByHTTP.h b/src/Server/HTTP/authenticateUserByHTTP.h new file mode 100644 index 00000000000..3b5a04cae68 --- /dev/null +++ b/src/Server/HTTP/authenticateUserByHTTP.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class HTTPServerRequest; +class HTMLForm; +class HTTPServerResponse; +class Session; +class Credentials; + +/// Authenticates a user via HTTP protocol and initializes a session. +/// Usually retrieves the name and the password for that user from either the request's headers or from the query parameters. +/// Returns true when the user successfully authenticated, +/// the session instance will be configured accordingly, and the request_credentials instance will be dropped. +/// Returns false when the user is not authenticated yet, and the HTTP_UNAUTHORIZED response is sent with the "WWW-Authenticate" header, +/// in this case the `request_credentials` instance must be preserved until the next request or until any exception. +/// Throws an exception if authentication failed. +bool authenticateUserByHTTP( + const HTTPServerRequest & request, + const HTMLForm & params, + HTTPServerResponse & response, + Session & session, + std::unique_ptr & request_credentials, + ContextPtr global_context, + LoggerPtr log); + +} diff --git a/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp new file mode 100644 index 00000000000..6de57217aac --- /dev/null +++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp @@ -0,0 +1,158 @@ +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int CANNOT_PARSE_TEXT; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; + extern const int CANNOT_PARSE_QUOTED_STRING; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_PARSE_DATETIME; + extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; + extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_SCHEDULE_TASK; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_COMPILE_REGEXP; + extern const int DUPLICATE_COLUMN; + extern const int ILLEGAL_COLUMN; + extern const int THERE_IS_NO_COLUMN; + extern const int UNKNOWN_ELEMENT_IN_AST; + extern const int UNKNOWN_TYPE_OF_AST_NODE; + extern const int TOO_DEEP_AST; + extern const int TOO_BIG_AST; + extern const int UNEXPECTED_AST_STRUCTURE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + + extern const int SYNTAX_ERROR; + + extern const int INCORRECT_DATA; + extern const int TYPE_MISMATCH; + + extern const int UNKNOWN_TABLE; + extern const int UNKNOWN_FUNCTION; + extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_TYPE; + extern const int UNKNOWN_STORAGE; + extern const int UNKNOWN_DATABASE; + extern const int UNKNOWN_SETTING; + extern const int UNKNOWN_DIRECTION_OF_SORTING; + extern const int UNKNOWN_AGGREGATE_FUNCTION; + extern const int UNKNOWN_FORMAT; + extern const int UNKNOWN_DATABASE_ENGINE; + extern const int UNKNOWN_TYPE_OF_QUERY; + extern const int UNKNOWN_ROLE; + + extern const int QUERY_IS_TOO_LARGE; + + extern const int NOT_IMPLEMENTED; + extern const int SOCKET_TIMEOUT; + + extern const int UNKNOWN_USER; + extern const int WRONG_PASSWORD; + extern const int REQUIRED_PASSWORD; + extern const int AUTHENTICATION_FAILED; + extern const int SET_NON_GRANTED_ROLE; + + extern const int HTTP_LENGTH_REQUIRED; + + extern const int TIMEOUT_EXCEEDED; +} + + +Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code) +{ + using namespace Poco::Net; + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + { + return HTTPResponse::HTTP_UNAUTHORIZED; + } + else if (exception_code == ErrorCodes::UNKNOWN_USER || + exception_code == ErrorCodes::WRONG_PASSWORD || + exception_code == ErrorCodes::AUTHENTICATION_FAILED || + exception_code == ErrorCodes::SET_NON_GRANTED_ROLE) + { + return HTTPResponse::HTTP_FORBIDDEN; + } + else if (exception_code == ErrorCodes::BAD_ARGUMENTS || + exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || + exception_code == ErrorCodes::CANNOT_PARSE_TEXT || + exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || + exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_DATE || + exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || + exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || + exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || + exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || + exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || + exception_code == ErrorCodes::CANNOT_PARSE_UUID || + exception_code == ErrorCodes::DUPLICATE_COLUMN || + exception_code == ErrorCodes::ILLEGAL_COLUMN || + exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || + exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || + exception_code == ErrorCodes::THERE_IS_NO_COLUMN || + exception_code == ErrorCodes::TOO_DEEP_AST || + exception_code == ErrorCodes::TOO_BIG_AST || + exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || + exception_code == ErrorCodes::SYNTAX_ERROR || + exception_code == ErrorCodes::INCORRECT_DATA || + exception_code == ErrorCodes::TYPE_MISMATCH || + exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) + { + return HTTPResponse::HTTP_BAD_REQUEST; + } + else if (exception_code == ErrorCodes::UNKNOWN_TABLE || + exception_code == ErrorCodes::UNKNOWN_FUNCTION || + exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || + exception_code == ErrorCodes::UNKNOWN_TYPE || + exception_code == ErrorCodes::UNKNOWN_STORAGE || + exception_code == ErrorCodes::UNKNOWN_DATABASE || + exception_code == ErrorCodes::UNKNOWN_SETTING || + exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING || + exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION || + exception_code == ErrorCodes::UNKNOWN_FORMAT || + exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE || + exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY || + exception_code == ErrorCodes::UNKNOWN_ROLE) + { + return HTTPResponse::HTTP_NOT_FOUND; + } + else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE) + { + return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE; + } + else if (exception_code == ErrorCodes::NOT_IMPLEMENTED) + { + return HTTPResponse::HTTP_NOT_IMPLEMENTED; + } + else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || + exception_code == ErrorCodes::CANNOT_OPEN_FILE) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } + else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) + { + return HTTPResponse::HTTP_LENGTH_REQUIRED; + } + else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) + { + return HTTPResponse::HTTP_REQUEST_TIMEOUT; + } + else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } + + return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; +} + +} diff --git a/src/Server/HTTP/exceptionCodeToHTTPStatus.h b/src/Server/HTTP/exceptionCodeToHTTPStatus.h new file mode 100644 index 00000000000..aadec7aac5a --- /dev/null +++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.h @@ -0,0 +1,11 @@ +#pragma once +#include + + +namespace DB +{ + +/// Converts Exception code to HTTP status code. +Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code); + +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.cpp b/src/Server/HTTP/sendExceptionToHTTPClient.cpp new file mode 100644 index 00000000000..78650758e35 --- /dev/null +++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp @@ -0,0 +1,80 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int HTTP_LENGTH_REQUIRED; + extern const int REQUIRED_PASSWORD; +} + + +void sendExceptionToHTTPClient( + const String & exception_message, + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + WriteBufferFromHTTPServerResponse * out, + LoggerPtr log) +{ + setHTTPResponseStatusAndHeadersForException(exception_code, request, response, out, log); + + if (!out) + { + /// If nothing was sent yet. + WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT}; + + out_for_message.writeln(exception_message); + out_for_message.finalize(); + } + else + { + /// If buffer has data, and that data wasn't sent yet, then no need to send that data + bool data_sent = (out->count() != out->offset()); + + if (!data_sent) + out->position() = out->buffer().begin(); + + out->writeln(exception_message); + out->finalize(); + } +} + + +void setHTTPResponseStatusAndHeadersForException( + int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log) +{ + if (out) + out->setExceptionCode(exception_code); + else + response.set("X-ClickHouse-Exception-Code", toString(exception_code)); + + /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body + /// to avoid reading part of the current request body in the next request. + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) + { + try + { + if (!request.getStream().eof()) + request.getStream().ignoreAll(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); + response.setKeepAlive(false); + } + } + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + response.requireAuthentication("ClickHouse server HTTP API"); + else + response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); +} +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.h b/src/Server/HTTP/sendExceptionToHTTPClient.h new file mode 100644 index 00000000000..31fda88d900 --- /dev/null +++ b/src/Server/HTTP/sendExceptionToHTTPClient.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class HTTPServerRequest; +class HTTPServerResponse; +class WriteBufferFromHTTPServerResponse; + +/// Sends an exception to HTTP client. This function doesn't handle its own exceptions so it needs to be wrapped in try-catch. +/// Argument `out` may be either created from `response` or be nullptr (if it wasn't created before the exception). +void sendExceptionToHTTPClient( + const String & exception_message, + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + WriteBufferFromHTTPServerResponse * out, + LoggerPtr log); + +/// Sets "X-ClickHouse-Exception-Code" header and the correspondent HTTP status in the response for an exception. +/// This is a part of what sendExceptionToHTTPClient() does. +void setHTTPResponseStatusAndHeadersForException( + int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log); +} diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp new file mode 100644 index 00000000000..d42bd77e339 --- /dev/null +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp @@ -0,0 +1,24 @@ +#include + +#include +#include + + +namespace DB +{ + +void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method) +{ + /// Anything else beside HTTP POST should be readonly queries. + if (http_method != HTTPServerRequest::HTTP_POST) + { + /// 'readonly' setting values mean: + /// readonly = 0 - any query is allowed, client can change any setting. + /// readonly = 1 - only readonly queries are allowed, client can't change settings. + /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'. + if (context->getSettingsRef().readonly == 0) + context->setSetting("readonly", 2); + } +} + +} diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h new file mode 100644 index 00000000000..c46f2032d82 --- /dev/null +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h @@ -0,0 +1,12 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Sets readonly = 2 if the current HTTP method is not HTTP POST and if readonly is not set already. +void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method); + +} diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a00f6fb5412..370af79e456 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1,11 +1,6 @@ #include -#include #include -#include -#include -#include -#include #include #include #include @@ -37,20 +32,14 @@ #include #include #include +#include +#include +#include #include -#include -#include "config.h" - -#include -#include -#include #include -#include -#include -#include -#include -#include + +#include "config.h" #include #include @@ -60,78 +49,19 @@ #include #include -#if USE_SSL -#include -#endif - namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_PARSE_TEXT; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_PARSE_QUOTED_STRING; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_PARSE_DATETIME; - extern const int CANNOT_PARSE_NUMBER; - extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; - extern const int CANNOT_PARSE_IPV4; - extern const int CANNOT_PARSE_IPV6; - extern const int CANNOT_PARSE_UUID; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_SCHEDULE_TASK; - extern const int DUPLICATE_COLUMN; - extern const int ILLEGAL_COLUMN; - extern const int THERE_IS_NO_COLUMN; - extern const int UNKNOWN_ELEMENT_IN_AST; - extern const int UNKNOWN_TYPE_OF_AST_NODE; - extern const int TOO_DEEP_AST; - extern const int TOO_BIG_AST; - extern const int UNEXPECTED_AST_STRUCTURE; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; - extern const int SYNTAX_ERROR; - - extern const int INCORRECT_DATA; - extern const int TYPE_MISMATCH; - - extern const int UNKNOWN_TABLE; - extern const int UNKNOWN_FUNCTION; - extern const int UNKNOWN_IDENTIFIER; - extern const int UNKNOWN_TYPE; - extern const int UNKNOWN_STORAGE; - extern const int UNKNOWN_DATABASE; - extern const int UNKNOWN_SETTING; - extern const int UNKNOWN_DIRECTION_OF_SORTING; - extern const int UNKNOWN_AGGREGATE_FUNCTION; - extern const int UNKNOWN_FORMAT; - extern const int UNKNOWN_DATABASE_ENGINE; - extern const int UNKNOWN_TYPE_OF_QUERY; - extern const int UNKNOWN_ROLE; extern const int NO_ELEMENTS_IN_CONFIG; - extern const int QUERY_IS_TOO_LARGE; - - extern const int NOT_IMPLEMENTED; - extern const int SOCKET_TIMEOUT; - - extern const int UNKNOWN_USER; - extern const int WRONG_PASSWORD; - extern const int REQUIRED_PASSWORD; - extern const int AUTHENTICATION_FAILED; - extern const int SET_NON_GRANTED_ROLE; - extern const int INVALID_SESSION_TIMEOUT; extern const int HTTP_LENGTH_REQUIRED; - extern const int SUPPORT_IS_DISABLED; - - extern const int TIMEOUT_EXCEEDED; } namespace @@ -173,115 +103,6 @@ void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::Laye } } -static String base64Decode(const String & encoded) -{ - String decoded; - Poco::MemoryInputStream istr(encoded.data(), encoded.size()); - Poco::Base64Decoder decoder(istr); - Poco::StreamCopier::copyToString(decoder, decoded); - return decoded; -} - -static String base64Encode(const String & decoded) -{ - std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - ostr.exceptions(std::ios::failbit); - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << decoded; - encoder.close(); - return ostr.str(); -} - -static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code) -{ - using namespace Poco::Net; - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - { - return HTTPResponse::HTTP_UNAUTHORIZED; - } - else if (exception_code == ErrorCodes::UNKNOWN_USER || - exception_code == ErrorCodes::WRONG_PASSWORD || - exception_code == ErrorCodes::AUTHENTICATION_FAILED || - exception_code == ErrorCodes::SET_NON_GRANTED_ROLE) - { - return HTTPResponse::HTTP_FORBIDDEN; - } - else if (exception_code == ErrorCodes::BAD_ARGUMENTS || - exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || - exception_code == ErrorCodes::CANNOT_PARSE_TEXT || - exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || - exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || - exception_code == ErrorCodes::CANNOT_PARSE_DATE || - exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || - exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || - exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || - exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || - exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || - exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || - exception_code == ErrorCodes::CANNOT_PARSE_UUID || - exception_code == ErrorCodes::DUPLICATE_COLUMN || - exception_code == ErrorCodes::ILLEGAL_COLUMN || - exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || - exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || - exception_code == ErrorCodes::THERE_IS_NO_COLUMN || - exception_code == ErrorCodes::TOO_DEEP_AST || - exception_code == ErrorCodes::TOO_BIG_AST || - exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || - exception_code == ErrorCodes::SYNTAX_ERROR || - exception_code == ErrorCodes::INCORRECT_DATA || - exception_code == ErrorCodes::TYPE_MISMATCH || - exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) - { - return HTTPResponse::HTTP_BAD_REQUEST; - } - else if (exception_code == ErrorCodes::UNKNOWN_TABLE || - exception_code == ErrorCodes::UNKNOWN_FUNCTION || - exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || - exception_code == ErrorCodes::UNKNOWN_TYPE || - exception_code == ErrorCodes::UNKNOWN_STORAGE || - exception_code == ErrorCodes::UNKNOWN_DATABASE || - exception_code == ErrorCodes::UNKNOWN_SETTING || - exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING || - exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION || - exception_code == ErrorCodes::UNKNOWN_FORMAT || - exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE || - exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY || - exception_code == ErrorCodes::UNKNOWN_ROLE) - { - return HTTPResponse::HTTP_NOT_FOUND; - } - else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE) - { - return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE; - } - else if (exception_code == ErrorCodes::NOT_IMPLEMENTED) - { - return HTTPResponse::HTTP_NOT_IMPLEMENTED; - } - else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || - exception_code == ErrorCodes::CANNOT_OPEN_FILE) - { - return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; - } - else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) - { - return HTTPResponse::HTTP_LENGTH_REQUIRED; - } - else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) - { - return HTTPResponse::HTTP_REQUEST_TIMEOUT; - } - else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) - { - return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; - } - - return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; -} - - static std::chrono::steady_clock::duration parseSessionTimeout( const Poco::Util::AbstractConfiguration & config, const HTMLForm & params) @@ -358,204 +179,9 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTP HTTPHandler::~HTTPHandler() = default; -bool HTTPHandler::authenticateUser( - HTTPServerRequest & request, - HTMLForm & params, - HTTPServerResponse & response) +bool HTTPHandler::authenticateUser(HTTPServerRequest & request, HTMLForm & params, HTTPServerResponse & response) { - using namespace Poco::Net; - - /// The user and password can be passed by headers (similar to X-Auth-*), - /// which is used by load balancers to pass authentication information. - std::string user = request.get("X-ClickHouse-User", ""); - std::string password = request.get("X-ClickHouse-Key", ""); - std::string quota_key = request.get("X-ClickHouse-Quota", ""); - - /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name - /// extracted from the SSL certificate used for this connection instead of checking password. - bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); - bool has_auth_headers = !user.empty() || !password.empty() || has_ssl_certificate_auth; - - /// User name and password can be passed using HTTP Basic auth or query parameters - /// (both methods are insecure). - bool has_http_credentials = request.hasCredentials(); - bool has_credentials_in_query_params = params.has("user") || params.has("password"); - - std::string spnego_challenge; - SSLCertificateSubjects certificate_subjects; - - if (has_auth_headers) - { - /// It is prohibited to mix different authorization schemes. - if (has_http_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and Authorization HTTP header simultaneously"); - if (has_credentials_in_query_params) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and authentication via parameters simultaneously simultaneously"); - - if (has_ssl_certificate_auth) - { -#if USE_SSL - if (!password.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and authentication via password simultaneously"); - - if (request.havePeerCertificate()) - certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); - - if (certificate_subjects.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "SSL certificate authentication disabled because ClickHouse was built without SSL library"); -#endif - } - } - else if (has_http_credentials) - { - /// It is prohibited to mix different authorization schemes. - if (has_credentials_in_query_params) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use Authorization HTTP header and authentication via parameters simultaneously"); - - std::string scheme; - std::string auth_info; - request.getCredentials(scheme, auth_info); - - if (Poco::icompare(scheme, "Basic") == 0) - { - HTTPBasicCredentials credentials(auth_info); - user = credentials.getUsername(); - password = credentials.getPassword(); - } - else if (Poco::icompare(scheme, "Negotiate") == 0) - { - spnego_challenge = auth_info; - - if (spnego_challenge.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); - } - else - { - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); - } - } - else - { - /// If the user name is not set we assume it's the 'default' user. - user = params.get("user", "default"); - password = params.get("password", ""); - } - - if (!certificate_subjects.empty()) - { - if (!request_credentials) - request_credentials = std::make_unique(user, std::move(certificate_subjects)); - - auto * certificate_credentials = dynamic_cast(request_credentials.get()); - if (!certificate_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); - } - else if (!spnego_challenge.empty()) - { - if (!request_credentials) - request_credentials = server.context()->makeGSSAcceptorContext(); - - auto * gss_acceptor_context = dynamic_cast(request_credentials.get()); - if (!gss_acceptor_context) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunreachable-code" - const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log)); -#pragma clang diagnostic pop - - if (!spnego_response.empty()) - response.set("WWW-Authenticate", "Negotiate " + spnego_response); - - if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) - { - if (spnego_response.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - } - else // I.e., now using user name and password strings ("Basic"). - { - if (!request_credentials) - request_credentials = std::make_unique(); - - auto * basic_credentials = dynamic_cast(request_credentials.get()); - if (!basic_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); - - basic_credentials->setUserName(user); - basic_credentials->setPassword(password); - } - - if (params.has("quota_key")) - { - if (!quota_key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Invalid authentication: it is not allowed " - "to use quota key as HTTP header and as parameter simultaneously"); - - quota_key = params.get("quota_key"); - } - - /// Set client info. It will be used for quota accounting parameters in 'setUser' method. - - session->setHTTPClientInfo(request); - session->setQuotaClientKey(quota_key); - - /// Extract the last entry from comma separated list of forwarded_for addresses. - /// Only the last proxy can be trusted (if any). - String forwarded_address = session->getClientInfo().getLastForwardedFor(); - try - { - if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) - session->authenticate(*request_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port())); - else - session->authenticate(*request_credentials, request.clientAddress()); - } - catch (const Authentication::Require & required_credentials) - { - request_credentials = std::make_unique(); - - if (required_credentials.getRealm().empty()) - response.set("WWW-Authenticate", "Basic"); - else - response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\""); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - catch (const Authentication::Require & required_credentials) - { - request_credentials = server.context()->makeGSSAcceptorContext(); - - if (required_credentials.getRealm().empty()) - response.set("WWW-Authenticate", "Negotiate"); - else - response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\""); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - - request_credentials.reset(); - return true; + return authenticateUserByHTTP(request, params, response, *session, request_credentials, server.context(), log); } @@ -727,10 +353,22 @@ void HTTPHandler::processQuery( std::unique_ptr in; - static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role", - "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session"}; + auto roles = params.getAll("role"); + if (!roles.empty()) + context->setCurrentRoles(roles); - Names reserved_param_suffixes; + std::string database = request.get("X-ClickHouse-Database", params.get("database", "")); + if (!database.empty()) + context->setCurrentDatabase(database); + + std::string default_format = request.get("X-ClickHouse-Format", params.get("default_format", "")); + if (!default_format.empty()) + context->setDefaultFormat(default_format); + + /// Anything else beside HTTP POST should be readonly queries. + setReadOnlyIfHTTPMethodIdempotent(context, request.getMethod()); + + bool has_external_data = startsWith(request.getContentType(), "multipart/form-data"); auto param_could_be_skipped = [&] (const String & name) { @@ -738,87 +376,36 @@ void HTTPHandler::processQuery( if (name.empty()) return true; + /// Some parameters (database, default_format, everything used in the code above) do not + /// belong to the Settings class. + static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role", + "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session", + "database", "default_format"}; + if (reserved_param_names.contains(name)) return true; - for (const String & suffix : reserved_param_suffixes) + /// For external data we also want settings. + if (has_external_data) { - if (endsWith(name, suffix)) - return true; + /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. + /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. + static const Names reserved_param_suffixes = {"_format", "_types", "_structure"}; + for (const String & suffix : reserved_param_suffixes) + { + if (endsWith(name, suffix)) + return true; + } } return false; }; - auto roles = params.getAll("role"); - if (!roles.empty()) - { - const auto & access_control = context->getAccessControl(); - const auto & user = context->getUser(); - std::vector roles_ids(roles.size()); - for (size_t i = 0; i < roles.size(); i++) - { - auto role_id = access_control.getID(roles[i]); - if (user->granted_roles.isGranted(role_id)) - roles_ids[i] = role_id; - else - throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", roles[i].get()); - } - context->setCurrentRoles(roles_ids); - } - /// Settings can be overridden in the query. - /// Some parameters (database, default_format, everything used in the code above) do not - /// belong to the Settings class. - - /// 'readonly' setting values mean: - /// readonly = 0 - any query is allowed, client can change any setting. - /// readonly = 1 - only readonly queries are allowed, client can't change settings. - /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'. - - /// In theory if initially readonly = 0, the client can change any setting and then set readonly - /// to some other value. - const auto & settings = context->getSettingsRef(); - - /// Anything else beside HTTP POST should be readonly queries. - if (request.getMethod() != HTTPServerRequest::HTTP_POST) - { - if (settings.readonly == 0) - context->setSetting("readonly", 2); - } - - bool has_external_data = startsWith(request.getContentType(), "multipart/form-data"); - - if (has_external_data) - { - /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. - reserved_param_suffixes.reserve(3); - /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. - reserved_param_suffixes.emplace_back("_format"); - reserved_param_suffixes.emplace_back("_types"); - reserved_param_suffixes.emplace_back("_structure"); - } - - std::string database = request.get("X-ClickHouse-Database", ""); - std::string default_format = request.get("X-ClickHouse-Format", ""); - SettingsChanges settings_changes; for (const auto & [key, value] : params) { - if (key == "database") - { - if (database.empty()) - database = value; - } - else if (key == "default_format") - { - if (default_format.empty()) - default_format = value; - } - else if (param_could_be_skipped(key)) - { - } - else + if (!param_could_be_skipped(key)) { /// Other than query parameters are treated as settings. if (!customizeQueryParam(context, key, value)) @@ -826,15 +413,9 @@ void HTTPHandler::processQuery( } } - if (!database.empty()) - context->setCurrentDatabase(database); - - if (!default_format.empty()) - context->setDefaultFormat(default_format); - - /// For external data we also want settings context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); + const auto & settings = context->getSettingsRef(); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. context->setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", ""))); @@ -936,7 +517,7 @@ void HTTPHandler::processQuery( { bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); - formatExceptionForClient(status.code, request, response, used_output); + setHTTPResponseStatusAndHeadersForException(status.code, request, response, used_output.out_holder.get(), log); current_output_format.setException(status.message); current_output_format.finalize(); used_output.exception_is_written = true; @@ -970,7 +551,7 @@ void HTTPHandler::trySendExceptionToClient( const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) try { - formatExceptionForClient(exception_code, request, response, used_output); + setHTTPResponseStatusAndHeadersForException(exception_code, request, response, used_output.out_holder.get(), log); if (!used_output.out_holder && !used_output.exception_is_written) { @@ -1032,38 +613,6 @@ catch (...) used_output.cancel(); } -void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) -{ - if (used_output.out_holder) - used_output.out_holder->setExceptionCode(exception_code); - else - response.set("X-ClickHouse-Exception-Code", toString(exception_code)); - - /// FIXME: make sure that no one else is reading from the same stream at the moment. - - /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body - /// to avoid reading part of the current request body in the next request. - if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) - { - try - { - if (!request.getStream().eof()) - request.getStream().ignoreAll(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); - response.setKeepAlive(false); - } - } - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - response.requireAuthentication("ClickHouse server HTTP API"); - else - response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); -} - void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("HTTPHandler"); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index c78c45826f0..6580b317f6e 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -173,12 +173,6 @@ private: HTTPServerResponse & response, Output & used_output); - void formatExceptionForClient( - int exception_code, - HTTPServerRequest & request, - HTTPServerResponse & response, - Output & used_output); - static void pushDelayedResults(Output & used_output); }; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 47064b467e7..5f26542d39c 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -2,31 +2,31 @@ #if USE_NURAFT -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include -#ifdef POCO_HAVE_FD_EPOLL - #include -#else - #include -#endif +# ifdef POCO_HAVE_FD_EPOLL +# include +# else +# include +# endif namespace ProfileEvents { @@ -400,13 +400,11 @@ void KeeperTCPHandler::runImpl() } auto response_fd = poll_wrapper->getResponseFD(); - auto response_callback = [responses_ = this->responses, response_fd](const Coordination::ZooKeeperResponsePtr & response) + auto response_callback = [my_responses = this->responses, + response_fd](const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request) { - if (!responses_->push(response)) - throw Exception(ErrorCodes::SYSTEM_ERROR, - "Could not push response with xid {} and zxid {}", - response->xid, - response->zxid); + if (!my_responses->push(RequestWithResponse{response, std::move(request)})) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push response with xid {} and zxid {}", response->xid, response->zxid); UInt8 single_byte = 1; [[maybe_unused]] ssize_t result = write(response_fd, &single_byte, sizeof(single_byte)); @@ -470,19 +468,20 @@ void KeeperTCPHandler::runImpl() /// became inconsistent and race condition is possible. while (result.responses_count != 0) { - Coordination::ZooKeeperResponsePtr response; + RequestWithResponse request_with_response; - if (!responses->tryPop(response)) + if (!responses->tryPop(request_with_response)) throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have ready response, but queue is empty. It's a bug."); log_long_operation("Waiting for response to be ready"); + auto & response = request_with_response.response; if (response->xid == close_xid) { LOG_DEBUG(log, "Session #{} successfully closed", session_id); return; } - updateStats(response); + updateStats(response, request_with_response.request); packageSent(); response->write(getWriteBuffer()); @@ -609,7 +608,7 @@ void KeeperTCPHandler::packageReceived() keeper_dispatcher->incrementPacketsReceived(); } -void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response) +void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response, const Coordination::ZooKeeperRequestPtr & request) { /// update statistics ignoring watch response and heartbeat. if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) @@ -617,6 +616,16 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response Int64 elapsed = (Poco::Timestamp() - operations[response->xid]); ProfileEvents::increment(ProfileEvents::KeeperTotalElapsedMicroseconds, elapsed); Int64 elapsed_ms = elapsed / 1000; + + if (request && elapsed_ms > static_cast(keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_total_threshold_ms)) + { + LOG_INFO( + log, + "Total time to process a request took too long ({}ms).\nRequest info: {}", + elapsed, + request->toString(/*short_format=*/true)); + } + conn_stats.updateLatency(elapsed_ms); operations.erase(response->xid); diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index c1c522eee89..7c2b8acf624 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -26,7 +26,13 @@ namespace DB struct SocketInterruptablePollWrapper; using SocketInterruptablePollWrapperPtr = std::unique_ptr; -using ThreadSafeResponseQueue = ConcurrentBoundedQueue; +struct RequestWithResponse +{ + Coordination::ZooKeeperResponsePtr response; + Coordination::ZooKeeperRequestPtr request; /// it can be nullptr for some responses +}; + +using ThreadSafeResponseQueue = ConcurrentBoundedQueue; using ThreadSafeResponseQueuePtr = std::shared_ptr; struct LastOp; @@ -104,7 +110,7 @@ private: void packageSent(); void packageReceived(); - void updateStats(Coordination::ZooKeeperResponsePtr & response); + void updateStats(Coordination::ZooKeeperResponsePtr & response, const Coordination::ZooKeeperRequestPtr & request); Poco::Timestamp established; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fccea9e258e..8d69df8de76 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -888,11 +888,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); - if (result_chunk) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,13 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = Squashing::squash(squashing.flush()); - if (!result_chunk) - { - return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); - } + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } @@ -2107,6 +2107,7 @@ void TCPHandler::initBlockOutput(const Block & block) *state.maybe_compressed_out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } @@ -2121,6 +2122,7 @@ void TCPHandler::initLogsBlockOutput(const Block & block) *out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } @@ -2135,6 +2137,7 @@ void TCPHandler::initProfileEventsBlockOutput(const Block & block) *out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 8791668cd89..e556bda2561 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk & chunk) +void DistributedSink::consume(Chunk chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk & chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,13 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp( - copy_query_ast, - job.local_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -721,13 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp( - query_ast, - context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 5b7396f2c6f..a4c95633595 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 0f9bd8b6ff9..abd4b4ce23b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,14 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter( - insert, - new_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); - + InterpreterInsertQuery interpreter(insert, new_context, false, true, true); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..f5c5d093ce1 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,13 +1099,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - kafka_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 9803fa0a160..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 82759e8a851..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include @@ -331,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -408,21 +407,6 @@ void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Ch builder = interpreter.buildQueryPipeline(); } - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(chunk_infos.clone(), cur_header); - }); - - String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(live_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 12d8e898347..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + void writeBlock(const Block & block, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index ff5214a5e51..bc8cb0ce69a 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,13 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7e4b1db4c89..d38001a0feb 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -874,46 +874,6 @@ static Field applyFunctionForField( return (*col)[0]; } -/// The case when arguments may have types different than in the primary key. -static std::pair applyFunctionForFieldOfUnknownType( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, - const Field & arg_value) -{ - ColumnsWithTypeAndName arguments{{ arg_type->createColumnConst(1, arg_value), arg_type, "x" }}; - DataTypePtr return_type = func->getResultType(); - - auto col = func->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - -/// Same as above but for binary operators -static std::pair applyBinaryFunctionForFieldOfUnknownType( - const FunctionOverloadResolverPtr & func, - const DataTypePtr & arg_type, - const Field & arg_value, - const DataTypePtr & arg_type2, - const Field & arg_value2) -{ - ColumnsWithTypeAndName arguments{ - {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}}; - - FunctionBasePtr func_base = func->build(arguments); - - DataTypePtr return_type = func_base->getResultType(); - - auto col = func_base->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { /// Fallback for fields without block reference. @@ -940,164 +900,92 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & return {field.columns, field.row_idx, result_idx}; } -/** When table's key has expression with these functions from a column, - * and when a column in a query is compared with a constant, such as: - * CREATE TABLE (x String) ORDER BY toDate(x) - * SELECT ... WHERE x LIKE 'Hello%' - * we want to apply the function to the constant for index analysis, - * but should modify it to pass on un-parsable values. - */ -static std::set date_time_parsing_functions = { - "toDate", - "toDate32", - "toDateTime", - "toDateTime64", - "parseDateTimeBestEffort", - "parseDateTimeBestEffortUS", - "parseDateTime32BestEffort", - "parseDateTime64BestEffort", - "parseDateTime", - "parseDateTimeInJodaSyntax", -}; - -/** The key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, - * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` - * condition also holds, so the index may be used to select only parts satisfying this condition. - * - * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the - * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). - * Instead, we can qualify only functions that do not transform the range (for example rounding), - * which while not strictly monotonic, are monotonic everywhere on the input range. - */ -bool KeyCondition::transformConstantWithValidFunctions( - ContextPtr context, - const String & expr_name, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, - std::function always_monotonic) const +/// Sequentially applies functions to the column, returns `true` +/// if all function arguments are compatible with functions +/// signatures, and none of the functions produce `NULL` output. +/// +/// After functions chain execution, fills result column and its type. +bool applyFunctionChainToColumn( + const ColumnPtr & in_column, + const DataTypePtr & in_data_type, + const std::vector & functions, + ColumnPtr & out_column, + DataTypePtr & out_data_type) { - const auto & sample_block = key_expr->getSampleBlock(); + // Remove LowCardinality from input column, and convert it to regular one + auto result_column = in_column->convertToFullIfNeeded(); + auto result_type = removeLowCardinality(in_data_type); - for (const auto & node : key_expr->getNodes()) + // In case function sequence is empty, return full non-LowCardinality column + if (functions.empty()) { - auto it = key_columns.find(node.result_name); - if (it != key_columns.end()) - { - std::stack chain; - - const auto * cur_node = &node; - bool is_valid_chain = true; - - while (is_valid_chain) - { - if (cur_node->result_name == expr_name) - break; - - chain.push(cur_node); - - if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) - { - is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); - - const ActionsDAG::Node * next_node = nullptr; - for (const auto * arg : cur_node->children) - { - if (arg->column && isColumnConst(*arg->column)) - continue; - - if (next_node) - is_valid_chain = false; - - next_node = arg; - } - - if (!next_node) - is_valid_chain = false; - - cur_node = next_node; - } - else if (cur_node->type == ActionsDAG::ActionType::ALIAS) - cur_node = cur_node->children.front(); - else - is_valid_chain = false; - } - - if (is_valid_chain) - { - out_type = removeLowCardinality(out_type); - auto const_type = removeLowCardinality(cur_node->result_type); - auto const_column = out_type->createColumnConst(1, out_value); - auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0]; - - if (const_value.isNull()) - return false; - - while (!chain.empty()) - { - const auto * func = chain.top(); - chain.pop(); - - if (func->type != ActionsDAG::ActionType::FUNCTION) - continue; - - const auto & func_name = func->function_base->getName(); - auto func_base = func->function_base; - const auto & arg_types = func_base->getArgumentTypes(); - if (date_time_parsing_functions.contains(func_name) && !arg_types.empty() && isStringOrFixedString(arg_types[0])) - { - auto func_or_null = FunctionFactory::instance().get(func_name + "OrNull", context); - ColumnsWithTypeAndName arguments; - int i = 0; - for (const auto & type : func->function_base->getArgumentTypes()) - arguments.push_back({nullptr, type, fmt::format("_{}", i++)}); - - func_base = func_or_null->build(arguments); - } - - if (func->children.size() == 1) - { - std::tie(const_value, const_type) - = applyFunctionForFieldOfUnknownType(func_base, const_type, const_value); - } - else if (func->children.size() == 2) - { - const auto * left = func->children[0]; - const auto * right = func->children[1]; - if (left->column && isColumnConst(*left->column)) - { - auto left_arg_type = left->result_type; - auto left_arg_value = (*left->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - left_arg_type, left_arg_value, const_type, const_value); - } - else - { - auto right_arg_type = right->result_type; - auto right_arg_value = (*right->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - const_type, const_value, right_arg_type, right_arg_value); - } - } - - if (const_value.isNull()) - return false; - } - - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - out_value = const_value; - out_type = const_type; - return true; - } - } + out_column = result_column; + out_data_type = result_type; + return true; } - return false; + // If first function arguments are empty, cannot transform input column + if (functions[0]->getArgumentTypes().empty()) + { + return false; + } + + // And cast it to the argument type of the first function in the chain + auto in_argument_type = functions[0]->getArgumentTypes()[0]; + if (canBeSafelyCasted(result_type, in_argument_type)) + { + result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type); + result_type = in_argument_type; + } + // If column cannot be casted accurate, casting with OrNull, and in case all + // values has been casted (no nulls), unpacking nested column from nullable. + // In case any further functions require Nullable input, they'll be able + // to cast it. + else + { + result_column = castColumnAccurateOrNull({result_column, result_type, ""}, in_argument_type); + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(in_argument_type); + } + + for (const auto & func : functions) + { + if (func->getArgumentTypes().empty()) + return false; + + auto argument_type = func->getArgumentTypes()[0]; + if (!canBeSafelyCasted(result_type, argument_type)) + return false; + + result_column = castColumnAccurate({result_column, result_type, ""}, argument_type); + result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size()); + result_type = func->getResultType(); + + // Transforming nullable columns to the nested ones, in case no nulls found + if (result_column->isNullable()) + { + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(func->getResultType()); + } + } + out_column = result_column; + out_data_type = result_type; + + return true; } bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( @@ -1118,13 +1006,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, - out_type, + transform_functions, [](const IFunctionBase & func, const IDataType & type) { if (!func.hasInformationAboutMonotonicity()) @@ -1138,6 +1026,27 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( } return true; }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, + out_type, + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } /// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` @@ -1173,28 +1082,48 @@ bool KeyCondition::canConstantBeWrappedByFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, + transform_functions, + [](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, out_type, - [](const IFunctionBase & func, const IDataType &) - { - return func.isDeterministic(); - }); + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } bool KeyCondition::tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num) + size_t & out_key_column_num, + bool & is_constant_transformed) { const auto & left_arg = func.getArgumentAt(0); out_key_column_num = 0; std::vector indexes_mapping; + std::vector set_transforming_chains; DataTypes data_types; auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index) @@ -1203,6 +1132,7 @@ bool KeyCondition::tryPrepareSetIndex( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; std::optional key_space_filling_curve_argument_pos; + MonotonicFunctionsChain set_transforming_chain; if (isKeyPossiblyWrappedByMonotonicFunctions( node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions) && !key_space_filling_curve_argument_pos) /// We don't support the analysis of space-filling curves and IN set. @@ -1210,6 +1140,15 @@ bool KeyCondition::tryPrepareSetIndex( indexes_mapping.push_back(index_mapping); data_types.push_back(data_type); out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); + } + // For partition index, checking if set can be transformed to prune any partitions + else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain)) + { + indexes_mapping.push_back(index_mapping); + data_types.push_back(data_type); + out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); } }; @@ -1275,6 +1214,23 @@ bool KeyCondition::tryPrepareSetIndex( auto set_element_type = set_types[set_element_index]; auto set_column = set_columns[set_element_index]; + if (!set_transforming_chains[indexes_mapping_index].empty()) + { + ColumnPtr transformed_set_column; + DataTypePtr transformed_set_type; + if (!applyFunctionChainToColumn( + set_column, + set_element_type, + set_transforming_chains[indexes_mapping_index], + transformed_set_column, + transformed_set_type)) + return false; + + set_column = transformed_set_column; + set_element_type = transformed_set_type; + is_constant_transformed = true; + } + if (canBeSafelyCasted(set_element_type, key_column_type)) { set_columns[set_element_index] = castColumn({set_column, set_element_type, {}}, key_column_type); @@ -1571,6 +1527,191 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( return false; } +/** When table's key has expression with these functions from a column, + * and when a column in a query is compared with a constant, such as: + * CREATE TABLE (x String) ORDER BY toDate(x) + * SELECT ... WHERE x LIKE 'Hello%' + * we want to apply the function to the constant for index analysis, + * but should modify it to pass on un-parsable values. + */ +static std::set date_time_parsing_functions = { + "toDate", + "toDate32", + "toDateTime", + "toDateTime64", + "parseDateTimeBestEffort", + "parseDateTimeBestEffortUS", + "parseDateTime32BestEffort", + "parseDateTime64BestEffort", + "parseDateTime", + "parseDateTimeInJodaSyntax", +}; + +/** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` + * condition also holds, so the index may be used to select only parts satisfying this condition. + * + * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the + * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). + * Instead, we can qualify only functions that do not transform the range (for example rounding), + * which while not strictly monotonic, are monotonic everywhere on the input range. + */ +bool KeyCondition::extractMonotonicFunctionsChainFromKey( + ContextPtr context, + const String & expr_name, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + MonotonicFunctionsChain & out_functions_chain, + std::function always_monotonic) const +{ + const auto & sample_block = key_expr->getSampleBlock(); + + for (const auto & node : key_expr->getNodes()) + { + auto it = key_columns.find(node.result_name); + if (it != key_columns.end()) + { + std::stack chain; + + const auto * cur_node = &node; + bool is_valid_chain = true; + + while (is_valid_chain) + { + if (cur_node->result_name == expr_name) + break; + + chain.push(cur_node); + + if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) + { + is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); + + const ActionsDAG::Node * next_node = nullptr; + for (const auto * arg : cur_node->children) + { + if (arg->column && isColumnConst(*arg->column)) + continue; + + if (next_node) + is_valid_chain = false; + + next_node = arg; + } + + if (!next_node) + is_valid_chain = false; + + cur_node = next_node; + } + else if (cur_node->type == ActionsDAG::ActionType::ALIAS) + cur_node = cur_node->children.front(); + else + is_valid_chain = false; + } + + if (is_valid_chain) + { + while (!chain.empty()) + { + const auto * func = chain.top(); + chain.pop(); + + if (func->type != ActionsDAG::ActionType::FUNCTION) + continue; + + auto func_name = func->function_base->getName(); + auto func_base = func->function_base; + + ColumnsWithTypeAndName arguments; + ColumnWithTypeAndName const_arg; + FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST; + + if (date_time_parsing_functions.contains(func_name)) + { + const auto & arg_types = func_base->getArgumentTypes(); + if (!arg_types.empty() && isStringOrFixedString(arg_types[0])) + { + func_name = func_name + "OrNull"; + } + + } + + auto func_builder = FunctionFactory::instance().tryGet(func_name, context); + + if (func->children.size() == 1) + { + arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""}); + } + else if (func->children.size() == 2) + { + const auto * left = func->children[0]; + const auto * right = func->children[1]; + if (left->column && isColumnConst(*left->column)) + { + const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""}; + arguments.push_back(const_arg); + arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""}); + kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST; + } + else + { + const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""}; + arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""}); + arguments.push_back(const_arg); + kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; + } + } + + auto out_func = func_builder->build(arguments); + if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST) + out_functions_chain.push_back(out_func); + else + out_functions_chain.push_back(std::make_shared(out_func, const_arg, kind)); + } + + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + return true; + } + } + } + + return false; +} + +bool KeyCondition::canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain) +{ + // Checking if column name matches any of key subexpressions + String expr_name = node.getColumnName(); + + if (array_joined_column_names.contains(expr_name)) + return false; + + if (!key_subexpr_names.contains(expr_name)) + { + expr_name = node.getColumnNameWithModuloLegacy(); + + if (!key_subexpr_names.contains(expr_name)) + return false; + } + + return extractMonotonicFunctionsChainFromKey( + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_res_column_type, + out_functions_chain, + [](const IFunctionBase & func, const IDataType &) + { + return func.isDeterministic(); + }); +} static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name) { @@ -1649,7 +1790,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme if (functionIsInOrGlobalInOperator(func_name)) { - if (tryPrepareSetIndex(func, out, key_column_num)) + if (tryPrepareSetIndex(func, out, key_column_num, is_constant_transformed)) { key_arg_pos = 0; is_set_const = true; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 6e5956706aa..9e2218d7a29 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -14,6 +14,7 @@ #include #include +#include "DataTypes/Serializations/ISerialization.h" namespace DB @@ -253,13 +254,12 @@ private: DataTypePtr & out_key_column_type, std::vector & out_functions_chain); - bool transformConstantWithValidFunctions( + bool extractMonotonicFunctionsChainFromKey( ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, + MonotonicFunctionsChain & out_functions_chain, std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( @@ -276,13 +276,25 @@ private: Field & out_value, DataTypePtr & out_type); + /// Checks if node is a subexpression of any of key columns expressions, + /// wrapped by deterministic functions, and if so, returns `true`, and + /// specifies key column position / type. Besides that it produces the + /// chain of functions which should be executed on set, to transform it + /// into key column values. + bool canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain); + /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. bool tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num); + size_t & out_key_column_num, + bool & is_constant_transformed); /// Checks that the index can not be used. /// diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 909a8a48bda..e31f6db5409 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -87,6 +87,7 @@ #include #include +#include #include #include @@ -2464,7 +2465,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) } /// Grab only parts that are not used by anyone (SELECTs for example). - if (!part.unique()) + if (!isSharedPtrUnique(part)) { part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed); skipped_parts.push_back(part->info); @@ -4360,13 +4361,13 @@ bool MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) part.reset(); - if (!((*it)->getState() == DataPartState::Outdated && it->unique())) + if (!((*it)->getState() == DataPartState::Outdated && isSharedPtrUnique(*it))) { if ((*it)->getState() != DataPartState::Outdated) LOG_WARNING(log, "Cannot immediately remove part {} because it's not in Outdated state " "usage counter {}", part_name_with_state, it->use_count()); - if (!it->unique()) + if (!isSharedPtrUnique(*it)) LOG_WARNING(log, "Cannot immediately remove part {} because someone using it right now " "usage counter {}", part_name_with_state, it->use_count()); return false; @@ -4432,7 +4433,7 @@ size_t MergeTreeData::getNumberOfOutdatedPartsWithExpiredRemovalTime() const for (const auto & part : outdated_parts_range) { auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && part.unique()) + if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && isSharedPtrUnique(part)) ++res; } @@ -7077,6 +7078,20 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( /// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading if (!query_context->getSettingsRef().allow_experimental_analyzer) { + const auto & settings = query_context->getSettingsRef(); + if (query_context->canUseParallelReplicasCustomKey()) + { + if (query_context->getClientInfo().distributed_depth > 0) + return QueryProcessingStage::FetchColumns; + + if (!supportsReplication() && !settings.parallel_replicas_for_non_replicated_merge_tree) + return QueryProcessingStage::Enum::FetchColumns; + + if (to_stage >= QueryProcessingStage::WithMergeableState + && query_context->canUseParallelReplicasCustomKeyForCluster(*query_context->getClusterForParallelReplicas())) + return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + } + if (query_context->getClientInfo().collaborate_with_initiator) return QueryProcessingStage::Enum::FetchColumns; @@ -7088,7 +7103,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( return QueryProcessingStage::Enum::WithMergeableState; /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled - if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + if (settings.parallel_replicas_for_non_replicated_merge_tree) return QueryProcessingStage::Enum::WithMergeableState; } } @@ -8626,7 +8641,7 @@ size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() /// Outdated part may be hold by SELECT query and still needs the index. /// This check requires lock of index_mutex but if outdated part is unique then there is no /// contention on it, so it's relatively cheap and it's ok to check under a global parts lock. - if (part.unique() && part->isIndexLoaded()) + if (isSharedPtrUnique(part) && part->isIndexLoaded()) parts_to_unload_index.push_back(part); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 21d046c76f2..52d12c9db7d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -154,7 +154,8 @@ void writeColumnSingleGranule( const SerializationPtr & serialization, ISerialization::OutputStreamGetter stream_getter, size_t from_row, - size_t number_of_rows) + size_t number_of_rows, + const MergeTreeWriterSettings & settings) { ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkSettings serialize_settings; @@ -162,6 +163,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); @@ -259,7 +261,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeColumnSingleGranule( block.getByName(name_and_type->name), getSerialization(name_and_type->name), - stream_getter, granule.start_row, granule.rows_to_write); + stream_getter, granule.start_row, granule.rows_to_write, settings); /// Each type always have at least one substream prev_stream->hashing_buf.next(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..5c9191dbb54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a69d21de8e7..74ea89a8864 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -433,6 +433,7 @@ void MergeTreeDataPartWriterWide::writeColumn( if (inserted) { ISerialization::SerializeBinaryBulkSettings serialize_settings; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } @@ -441,6 +442,7 @@ void MergeTreeDataPartWriterWide::writeColumn( serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; for (const auto & granule : granules) { @@ -630,6 +632,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index a9125b4047e..04171656fcf 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -76,6 +76,7 @@ struct MergeTreeWriterSettings , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) + , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) { } @@ -98,6 +99,7 @@ struct MergeTreeWriterSettings size_t low_cardinality_max_dictionary_size; bool low_cardinality_use_single_dictionary_for_part; + bool use_compact_variant_discriminators_serialization; }; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index a6298aab3d9..78b67de1a7e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -145,12 +145,8 @@ ChunkAndProgress MergeTreeSelectProcessor::read() ordered_columns.push_back(res.block.getByName(name).column); } - auto chunk = Chunk(ordered_columns, res.row_count); - if (add_part_level) - chunk.getChunkInfos().add(std::make_shared(task->getInfo().data_part->info.level)); - return ChunkAndProgress{ - .chunk = std::move(chunk), + .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 4f90f7131da..02f8d6f4f6a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -264,10 +264,7 @@ try ++it; } - auto result = Chunk(std::move(res_columns), rows_read); - if (add_part_level) - result.getChunkInfos().add(std::make_shared(data_part->info.level)); - return result; + return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); } } else diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 1f8d6abebd2..c0afd781c7e 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,6 +43,7 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index d8cfce1ca99..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,27 +1,14 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include - -#include +#include +#include +#include namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { @@ -71,12 +58,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk & chunk) +void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -89,18 +76,6 @@ void MergeTreeSink::consume(Chunk & chunk) size_t streams = 0; bool support_parallel_write = false; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - String block_dedup_token; - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -125,16 +100,22 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } - if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + } + size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -146,7 +127,6 @@ void MergeTreeSink::consume(Chunk & chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); - if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -163,16 +143,11 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = block_dedup_token, + .block_dedup_token = std::move(block_dedup_token), .part_counters = std::move(part_counters), }); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -185,8 +160,6 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - const Settings & settings = context->getSettingsRef(); - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -205,8 +178,7 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - - if (settings.insert_deduplicate && deduplication_log) + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 90976020d52..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,6 +36,7 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; + UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 164658c914e..9d696b70d9f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -93,6 +93,7 @@ struct MergedBlockOutputStream::Finalizer::Impl void MergedBlockOutputStream::Finalizer::finish() { std::unique_ptr to_finish = std::move(impl); + impl.reset(); if (to_finish) to_finish->finish(); } @@ -130,7 +131,19 @@ MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr impl_) : impl(std::move(impl_)) {} -MergedBlockOutputStream::Finalizer::~Finalizer() = default; +MergedBlockOutputStream::Finalizer::~Finalizer() +{ + try + { + if (impl) + finish(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + void MergedBlockOutputStream::finalizePart( const MergeTreeMutableDataPartPtr & new_part, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3dbcb5e5bda..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,12 +1314,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); - if (squashed_chunk) + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1341,10 +1342,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); - if (squashed_chunk) + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 5a84c6fd684..f46b4de10b7 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -444,6 +444,9 @@ void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAn ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size()); ++stats[replica_num].number_of_requests; + + if (replica_status[replica_num].is_announcement_received) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate announcement received for replica number {}", replica_num); replica_status[replica_num].is_announcement_received = true; LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index bbae054fbed..4b4f4c33e7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,25 +1,21 @@ +#include +#include +#include +#include +#include #include "Common/Exception.h" #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include - +#include +#include +#include +#include #include -#include - namespace ProfileEvents { @@ -257,12 +253,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); @@ -288,25 +284,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if constexpr (async_insert) { - const auto async_insert_info_ptr = chunk.getChunkInfos().get(); - if (async_insert_info_ptr) + const auto & chunk_info = chunk.getChunkInfo(); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - String block_dedup_token; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -358,10 +342,23 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + if (deduplicate) { + String block_dedup_token; + /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. + + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -369,13 +366,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } } profile_events_scope.reset(); @@ -421,15 +411,17 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); + /// If deduplicated data should not be inserted into MV, we need to set proper + /// value for `last_block_is_duplicate`, which is possible only after the part is committed. + /// Othervide we can delay commit. + /// TODO: we can also delay commit if there is no MVs. + if (!settings.deduplicate_blocks_in_dependent_materialized_views) + finishDelayedChunk(zookeeper); + ++num_blocks_processed; } @@ -439,6 +431,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; + last_block_is_duplicate = false; + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -451,6 +445,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; + last_block_is_duplicate = last_block_is_duplicate || deduplicated; + /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -539,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir] () + auto try_rollback_part_rename = [this, &part, &original_part_dir]() { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1155,16 +1151,8 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - const auto & settings = context->getSettingsRef(); - - ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( - settings.insert_keeper_fault_injection_probability, - settings.insert_keeper_fault_injection_seed, - storage.getZooKeeper(), - "ReplicatedMergeTreeSink::onFinish", - log); - - finishDelayedChunk(zookeeper); + auto zookeeper = storage.getZooKeeper(); + finishDelayedChunk(std::make_shared(zookeeper)); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 7d025361717..39623c20584 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,6 +59,16 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + /// For proper deduplication in MaterializedViews + bool lastBlockIsDuplicate() const override + { + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + + return last_block_is_duplicate; + } + struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -116,6 +126,7 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; + bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..4fb81d69070 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk & chunk) +void MessageQueueSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index 4a9248c6c4d..b3c1e61734f 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,13 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - nats_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index bc64ef15cf1..c896a760597 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -111,7 +113,7 @@ struct DeltaLakeMetadataImpl std::set result_files; NamesAndTypesList current_schema; DataLakePartitionColumns current_partition_columns; - const auto checkpoint_version = getCheckpointIfExists(result_files); + const auto checkpoint_version = getCheckpointIfExists(result_files, current_schema, current_partition_columns); if (checkpoint_version) { @@ -205,9 +207,32 @@ struct DeltaLakeMetadataImpl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); - // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - // object->stringify(oss); - // LOG_TEST(log, "Metadata: {}", oss.str()); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + const Poco::JSON::Object::Ptr & fields_object = fields_json.extract(); + + auto current_schema = parseMetadata(fields_object); + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } if (object->has("add")) { @@ -230,7 +255,12 @@ struct DeltaLakeMetadataImpl const auto value = partition_values->getValue(partition_name); auto name_and_type = file_schema.tryGetByName(partition_name); if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toNamesAndTypesDescription()); + } auto field = getFieldValue(value, name_and_type->type); current_partition_columns.emplace_back(*name_and_type, field); @@ -246,52 +276,35 @@ struct DeltaLakeMetadataImpl auto path = object->get("remove").extract()->getValue("path"); result.erase(fs::path(configuration->getPath()) / path); } - if (object->has("metaData")) - { - const auto metadata_object = object->get("metaData").extract(); - const auto schema_object = metadata_object->getValue("schemaString"); - - Poco::JSON::Parser p; - Poco::Dynamic::Var fields_json = parser.parse(schema_object); - Poco::JSON::Object::Ptr fields_object = fields_json.extract(); - - const auto fields = fields_object->get("fields").extract(); - NamesAndTypesList current_schema; - for (size_t i = 0; i < fields->size(); ++i) - { - const auto field = fields->getObject(static_cast(i)); - auto column_name = field->getValue("name"); - auto type = field->getValue("type"); - auto is_nullable = field->getValue("nullable"); - - std::string physical_name; - auto schema_metadata_object = field->get("metadata").extract(); - if (schema_metadata_object->has("delta.columnMapping.physicalName")) - physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); - else - physical_name = column_name; - - LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", - column_name, type, is_nullable, physical_name); - - current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); - } - - if (file_schema.empty()) - { - file_schema = current_schema; - } - else if (file_schema != current_schema) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Reading from files with different schema is not possible " - "({} is different from {})", - file_schema.toString(), current_schema.toString()); - } - } } } + NamesAndTypesList parseMetadata(const Poco::JSON::Object::Ptr & metadata_json) + { + NamesAndTypesList schema; + const auto fields = metadata_json->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + return schema; + } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) { if (field->isObject(type_key)) @@ -505,7 +518,10 @@ struct DeltaLakeMetadataImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result) + size_t getCheckpointIfExists( + std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns) { const auto version = readLastCheckpointIfExists(); if (!version) @@ -526,7 +542,8 @@ struct DeltaLakeMetadataImpl auto columns = ParquetSchemaReader(*buf, format_settings).readSchema(); /// Read only columns that we need. - columns.filterColumns(NameSet{"add", "remove"}); + auto filter_column_names = NameSet{"add", "metaData"}; + columns.filterColumns(filter_column_names); Block header; for (const auto & column : columns) header.insert({column.type->createColumn(), column.type, column.name}); @@ -540,9 +557,6 @@ struct DeltaLakeMetadataImpl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr file_schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); - ArrowColumnToCHColumn column_reader( header, "Parquet", format_settings.parquet.allow_missing_columns, @@ -553,29 +567,85 @@ struct DeltaLakeMetadataImpl std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); - const auto & res_columns = res.getColumns(); + Chunk chunk = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); + auto res_block = header.cloneWithColumns(chunk.detachColumns()); + res_block = Nested::flatten(res_block); - if (res_columns.size() != 2) - { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Unexpected number of columns: {} (having: {}, expected: {})", - res_columns.size(), res.dumpStructure(), header.dumpStructure()); - } + const auto * nullable_path_column = assert_cast(res_block.getByName("add.path").column.get()); + const auto & path_column = assert_cast(nullable_path_column->getNestedColumn()); + + const auto * nullable_schema_column = assert_cast(res_block.getByName("metaData.schemaString").column.get()); + const auto & schema_column = assert_cast(nullable_schema_column->getNestedColumn()); + + auto partition_values_column_raw = res_block.getByName("add.partitionValues").column; + const auto & partition_values_column = assert_cast(*partition_values_column_raw); - const auto * tuple_column = assert_cast(res_columns[0].get()); - const auto & nullable_column = assert_cast(tuple_column->getColumn(0)); - const auto & path_column = assert_cast(nullable_column.getNestedColumn()); for (size_t i = 0; i < path_column.size(); ++i) { - const auto filename = String(path_column.getDataAt(i)); - if (filename.empty()) + const auto metadata = String(schema_column.getDataAt(i)); + if (!metadata.empty()) + { + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(metadata); + const Poco::JSON::Object::Ptr & object = json.extract(); + + auto current_schema = parseMetadata(object); + if (file_schema.empty()) + { + file_schema = current_schema; + LOG_TEST(log, "Processed schema from checkpoint: {}", file_schema.toString()); + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } + } + + for (size_t i = 0; i < path_column.size(); ++i) + { + const auto path = String(path_column.getDataAt(i)); + if (path.empty()) continue; - LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + Field map; + partition_values_column.get(i, map); + auto partition_values_map = map.safeGet(); + if (!partition_values_map.empty()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & map_value : partition_values_map) + { + const auto tuple = map_value.safeGet(); + const auto partition_name = tuple[0].safeGet(); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toString()); + } + const auto value = tuple[1].safeGet(); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(std::move(name_and_type.value()), std::move(field)); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + + LOG_TEST(log, "Adding {}", path); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / path); if (!inserted) - throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); + throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", path); } return version; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index f1217bc9729..c8603fccb86 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -41,6 +41,7 @@ public: auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + const bool use_schema_from_metadata = columns_.empty(); if (base_configuration->format == "auto") base_configuration->format = "Parquet"; @@ -50,8 +51,9 @@ public: try { metadata = DataLakeMetadata::create(object_storage, base_configuration, context); - schema_from_metadata = metadata->getTableSchema(); configuration->setPaths(metadata->getDataFiles()); + if (use_schema_from_metadata) + schema_from_metadata = metadata->getTableSchema(); } catch (...) { @@ -66,7 +68,7 @@ public: return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, context, table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + use_schema_from_metadata ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d2bdd0af302..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk & chunk) +void StorageObjectStorageSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 6ab531bb21a..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6940f10cb91..a9a7e062076 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -206,23 +206,25 @@ Chunk StorageObjectStorageSource::generate() if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) { auto partition_values = partition_columns.find(filename); - - for (const auto & [name_and_type, value] : partition_values->second) + if (partition_values != partition_columns.end()) { - if (!read_from_format_info.source_header.has(name_and_type.name)) - continue; + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; - const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); - auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); - /// This column is filled with default value now, remove it. - chunk.erase(column_pos); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); - /// Add correct values. - if (chunk.hasColumns()) - chunk.addColumn(column_pos, std::move(partition_column)); - else - chunk.addColumn(std::move(partition_column)); + /// Add correct values. + if (column_pos < chunk.getNumColumns()) + chunk.addColumn(column_pos, std::move(partition_column)); + else + chunk.addColumn(std::move(partition_column)); + } } } return chunk; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 14b828e7268..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -454,13 +454,7 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter( - insert, - queue_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index ee2570756ed..09b009b26d8 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk & chunk) +void PartitionedSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk & chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(partition_index_to_chunk[partition_index]); + sink->consume(std::move(partition_index_to_chunk[partition_index])); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index fcd67556dc9..68edeb6fd73 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 44479bd01e2..ba3cc6f58d0 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,13 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ true, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context, true); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..2bb1e2dde0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,13 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index f686fbda664..b9edff39b82 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -592,7 +592,8 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) auto configuration = StoragePostgreSQL::getConfiguration(args.engine_args, args.getContext()); auto connection_info = postgres::formatConnectionString( configuration.database, configuration.host, configuration.port, - configuration.username, configuration.password); + configuration.username, configuration.password, + args.getContext()->getSettingsRef().postgresql_connection_attempt_timeout); bool has_settings = args.storage_def->settings; auto postgresql_replication_settings = std::make_unique(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..e4b19992151 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,13 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - rabbitmq_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 4b5188ca9f2..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -218,7 +218,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); @@ -247,10 +247,7 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - { - Chunk empty; - consume(empty); - } + consume({}); } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 64190c8c86f..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1f7f6939f40..c451cfd1bf5 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk & chunk) +void EmbeddedRocksDBSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 2e1e0c7b429..011322df829 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3473166a080..b9d3e071b6c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -313,8 +313,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index bdf69b9be15..848aeb59dad 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -166,7 +166,7 @@ struct SelectQueryInfo /// It's guaranteed to be present in JOIN TREE of `query_tree` QueryTreeNodePtr table_expression; - bool analyzer_can_use_parallel_replicas_on_follower = false; + bool current_table_chosen_for_reading_with_parallel_replicas = false; /// Table expression modifiers for storage std::optional table_expression_modifiers; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b064fba223a..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -607,7 +607,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,13 +1020,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 67586985ce8..2cf3ced2904 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -426,7 +426,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( query_info.cluster = cluster; - if (!local_context->canUseParallelReplicasCustomKey(*cluster)) + if (!local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { if (nodes > 1 && settings.optimize_skip_unused_shards) { @@ -839,7 +839,9 @@ void StorageDistributed::read( SelectQueryInfo modified_query_info = query_info; - if (local_context->getSettingsRef().allow_experimental_analyzer) + const auto & settings = local_context->getSettingsRef(); + + if (settings.allow_experimental_analyzer) { StorageID remote_storage_id = StorageID::createEmpty(); if (!remote_table_function_ptr) @@ -864,7 +866,7 @@ void StorageDistributed::read( header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - if (!local_context->getSettingsRef().allow_experimental_analyzer) + if (!settings.allow_experimental_analyzer) { modified_query_info.query = ClusterProxy::rewriteSelectQuery( local_context, modified_query_info.query, @@ -874,7 +876,7 @@ void StorageDistributed::read( /// Return directly (with correct header) if no shard to query. if (modified_query_info.getCluster()->getShardsInfo().empty()) { - if (local_context->getSettingsRef().allow_experimental_analyzer) + if (settings.allow_experimental_analyzer) return; Pipe pipe(std::make_shared(header)); @@ -893,27 +895,8 @@ void StorageDistributed::read( storage_snapshot, processed_stage); - const auto & settings = local_context->getSettingsRef(); - - ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (local_context->canUseParallelReplicasCustomKey(*modified_query_info.getCluster())) - { - if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) - { - additional_shard_filter_generator = - [my_custom_key_ast = std::move(custom_key_ast), - column_description = this->getInMemoryMetadataPtr()->columns, - custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, - custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, - custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, - context = local_context, - replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr - { - return getCustomKeyFilterForParallelReplica( - replica_count, replica_num - 1, my_custom_key_ast, {custom_key_type, custom_key_range_lower, custom_key_range_upper}, column_description, context); - }; - } - } + auto shard_filter_generator = ClusterProxy::getShardFilterGeneratorForCustomKey( + *modified_query_info.getCluster(), local_context, getInMemoryMetadataPtr()->columns); ClusterProxy::executeQuery( query_plan, @@ -928,7 +911,7 @@ void StorageDistributed::read( sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator, + shard_filter_generator, /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. @@ -1050,13 +1033,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter( - new_query, - query_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(new_query, query_context); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index beb93afc972..d712bd10da4 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -167,8 +167,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) current_configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); shards.insert(std::make_shared( args.table_id, std::move(pool), configuration.table, args.columns, args.constraints, String{}, context)); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 3fb397c7b81..8797f6a3dfa 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -366,12 +366,21 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user } else if (path.find_first_of("*?{") == std::string::npos) { - std::error_code error; - size_t size = fs::file_size(path, error); - if (!error) - total_bytes_to_read += size; + if (!fs::is_directory(path)) + { + std::error_code error; + size_t size = fs::file_size(path, error); + if (!error) + total_bytes_to_read += size; - paths.push_back(path); + paths.push_back(path); + } + else + { + /// We list non-directory files under that directory. + paths = listFilesWithRegexpMatching(path / fs::path("*"), total_bytes_to_read); + can_be_directory = false; + } } else { @@ -1778,12 +1787,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index c80e799a92b..20f99070000 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,10 +1248,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); - } + sink->consume(Chunk{block.getColumns(), block.rows()}); sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 463694c63aa..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,6 +21,7 @@ #include #include +#include "StorageLogSettings.h" #include #include #include @@ -341,7 +341,7 @@ public: } } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -398,9 +398,9 @@ private: }; -void LogSink::consume(Chunk & chunk) +void LogSink::consume(Chunk chunk) { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b1bd7053c2e..57da72d06ed 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); @@ -408,7 +408,7 @@ namespace auto data_file_path = temp_dir / fs::path{file_paths[data_bin_pos]}.filename(); auto data_out_compressed = temp_disk->writeFile(data_file_path); auto data_out = std::make_unique(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), max_compress_block_size); - NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), false, &index}; + NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), std::nullopt, false, &index}; for (const auto & block : *blocks) block_out.write(block); data_out->finalize(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 94d7a33d0dd..611289ffd78 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1,51 +1,51 @@ -#include "StorageMergeTree.h" -#include "Core/QueryProcessingStage.h" -#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include #include #include -#include -#include #include +#include #include -#include "Common/Exception.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include #include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -220,24 +220,50 @@ void StorageMergeTree::read( { ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits); + return; } - else - { - const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); - if (auto plan = reader.read( - column_names, + if (local_context->canUseParallelReplicasCustomKey() && settings.parallel_replicas_for_non_replicated_merge_tree + && !settings.allow_experimental_analyzer && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), storage_snapshot, - query_info, - local_context, - max_block_size, - num_streams, - nullptr, - enable_parallel_reading)) - query_plan = std::move(*plan); + processed_stage, + query_info.query, + local_context); + return; + } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); } + + const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() + && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree + && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.current_table_chosen_for_reading_with_parallel_replicas); + + if (auto plan = reader.read( + column_names, + storage_snapshot, + query_info, + local_context, + max_block_size, + num_streams, + nullptr, + enable_parallel_reading)) + query_plan = std::move(*plan); } std::optional StorageMergeTree::totalRows(const Settings &) const @@ -1551,6 +1577,12 @@ bool StorageMergeTree::optimize( { assertNotReadonly(); + if (deduplicate && getInMemoryMetadataPtr()->hasProjections()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + getStorageID().getTableName()); + if (deduplicate) { if (deduplicate_by_columns.empty()) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index e0818fafae9..62a2a048642 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -106,12 +107,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 2a8a7bd2ee7..da391909dff 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index cdfeab62b58..b5a388e8159 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!inserter) { if (on_conflict.empty()) @@ -613,8 +613,9 @@ void registerStoragePostgreSQL(StorageFactory & factory) auto pool = std::make_shared(configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); return std::make_shared( args.table_id, diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 1a275320f43..83bb3c606c9 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk & chunk) +void RedisSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,8 +567,7 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - Chunk chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a127384c03c..9fb0f082c6c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,20 +5,21 @@ #include #include +#include #include #include #include #include +#include #include #include #include #include +#include #include +#include #include #include -#include -#include -#include #include @@ -5272,6 +5273,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; + LOG_TRACE(log, "Start preparing for shutdown"); + try { auto settings_ptr = getSettings(); @@ -5282,7 +5285,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() stopBeingLeader(); if (attach_thread) + { attach_thread->shutdown(); + LOG_TRACE(log, "The attach thread is shutdown"); + } + restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again @@ -5295,6 +5302,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } + + LOG_TRACE(log, "Finished preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5332,6 +5341,8 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; + LOG_TRACE(log, "Shutdown started"); + flushAndPrepareForShutdown(); if (!shutdown_deadline.has_value()) @@ -5374,6 +5385,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } + LOG_TRACE(log, "Shutdown finished"); } @@ -5460,13 +5472,45 @@ void StorageReplicatedMergeTree::read( /// 2. Do not read parts that have not yet been written to the quorum of the replicas. /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) + { readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); + return; + } /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here - else if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + { readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage); - else - readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); -} + return; + } + + if (local_context->canUseParallelReplicasCustomKey() && !settings.allow_experimental_analyzer + && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + processed_stage, + query_info.query, + local_context); + return; + } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); + } + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( QueryPlan & query_plan, @@ -5508,7 +5552,8 @@ void StorageReplicatedMergeTree::readLocalImpl( const size_t num_streams) { const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); + && (!local_context->getSettingsRef().allow_experimental_analyzer + || query_info.current_table_chosen_for_reading_with_parallel_replicas); auto plan = reader.read( column_names, storage_snapshot, query_info, @@ -5746,6 +5791,12 @@ bool StorageReplicatedMergeTree::optimize( if (!is_leader) throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader"); + if (deduplicate && getInMemoryMetadataPtr()->hasProjections()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + getStorageID().getTableName()); + if (cleanup) { if (!getSettings()->allow_experimental_replacing_merge_with_cleanup) diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..179e4cee199 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 0d094c15880..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk & chunk) +void SetOrJoinSink::consume(Chunk chunk) { - Block block = getHeader().cloneWithColumns(chunk.getColumns()); + Block block = getHeader().cloneWithColumns(chunk.detachColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 9b6d9f041e1..43560a6d95d 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -193,7 +193,7 @@ public: storage.saveFileSizes(lock); size_t initial_data_size = storage.file_checker.getFileSize(storage.data_file_path); - block_out = std::make_unique(*data_out, 0, metadata_snapshot->getSampleBlock(), false, &storage.indices, initial_data_size); + block_out = std::make_unique(*data_out, 0, metadata_snapshot->getSampleBlock(), std::nullopt, false, &storage.indices, initial_data_size); } String getName() const override { return "StripeLogSink"; } @@ -226,9 +226,9 @@ public: } } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 90e05c44e31..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -565,12 +565,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk & chunk) +void StorageURLSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1804079e75f..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f48a8c67971..fbc99ab865e 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -328,7 +328,7 @@ void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter) - VirtualColumnUtils::buildSetsForDAG(filter, context); + VirtualColumnUtils::buildSetsForDAG(*filter, context); } } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 175c0834bcb..f7d1c1b3eb8 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -274,7 +274,7 @@ void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) filter_by_database = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter_by_database) - VirtualColumnUtils::buildSetsForDAG(filter_by_database, context); + VirtualColumnUtils::buildSetsForDAG(*filter_by_database, context); block.insert(ColumnWithTypeAndName({}, std::make_shared(), table_column_name)); block.insert(ColumnWithTypeAndName({}, std::make_shared(), engine_column_name)); @@ -283,7 +283,7 @@ void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) filter_by_other_columns = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter_by_other_columns) - VirtualColumnUtils::buildSetsForDAG(filter_by_other_columns, context); + VirtualColumnUtils::buildSetsForDAG(*filter_by_other_columns, context); } } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c9c606de049..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 778c9e13adb..27c52124e9c 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -54,9 +54,9 @@ namespace DB namespace VirtualColumnUtils { -void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) +void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) { - for (const auto & node : dag->getNodes()) + for (const auto & node : dag.getNodes()) { if (node.type == ActionsDAG::ActionType::COLUMN) { @@ -79,7 +79,7 @@ void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) { - buildSetsForDAG(dag, context); + buildSetsForDAG(*dag, context); auto actions = std::make_shared(dag); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index fbfbdd6c6cc..9045a2f5481 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -26,7 +26,7 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); /// Builds sets used by ActionsDAG inplace. -void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context); +void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); /// Recursively checks if all functions used in DAG are deterministic in scope of query. bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e36247103c7..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -305,7 +304,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,13 +689,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter( - insert, - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, getContext()); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1420,7 +1413,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) + StorageWindowView & window_view, const Block & block, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1435,7 +1428,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block)); + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1480,10 +1473,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header_) + pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header_, std::make_shared(filter_expression), + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1538,30 +1531,6 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); - - builder.addSimpleTransform([&](const Block & stream_header) - { - // Can't move chunk_infos here, that function could be called several times - return std::make_shared(chunk_infos.clone(), stream_header); - }); - - String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(window_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); - }); -#endif - builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1601,13 +1570,6 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); - }); -#endif - auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1624,16 +1586,9 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); } -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Before out", stream_header); - }); -#endif - builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..f79867df424 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index ed378169381..84ba92bba00 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -290,7 +290,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 9f849052071..adc1074b1fe 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -35,7 +35,6 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); -void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -45,6 +44,10 @@ void registerStorageIceberg(StorageFactory & factory); #endif #endif +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory); +#endif + #if USE_HDFS #if USE_HIVE void registerStorageHive(StorageFactory & factory); diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 8d94988cd65..508f85df6a3 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -80,8 +80,9 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex *configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); } } diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index 9bc44025826..f12d27979ce 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,3 +15,4 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True +ignore_missing_imports = True \ No newline at end of file diff --git a/tests/ci/ci.py b/tests/ci/ci.py index af2f4c0a1fc..fac50d30022 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -15,7 +15,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string +from ci_utils import GHActions, normalize_string, Shell from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -53,6 +53,7 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings +from ci_buddy import CIBuddy from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -262,6 +263,8 @@ def check_missing_images_on_dockerhub( def _pre_action(s3, indata, pr_info): + print("Clear dmesg") + Shell.run("sudo dmesg --clear ||:") CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -1118,6 +1121,14 @@ def main() -> int: ### POST action: start elif args.post: + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + print("WARNING: OOM while job execution") + CIBuddy(dry_run=not pr_info.is_release).post_error( + "Out Of Memory", job_name=_get_ext_check_name(args.job_name) + ) + job_report = JobReport.load() if JobReport.exist() else None if job_report: ch_helper = ClickHouseHelper() diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py new file mode 100644 index 00000000000..ea690bb602c --- /dev/null +++ b/tests/ci/ci_buddy.py @@ -0,0 +1,88 @@ +import json +import os + +import boto3 +import requests +from botocore.exceptions import ClientError + +from pr_info import PRInfo +from ci_utils import Shell + + +class CIBuddy: + _HEADERS = {"Content-Type": "application/json"} + + def __init__(self, dry_run=False): + self.repo = os.getenv("GITHUB_REPOSITORY", "") + self.dry_run = dry_run + res = self._get_webhooks() + self.test_channel = "" + self.dev_ci_channel = "" + if res: + self.test_channel = json.loads(res)["test_channel"] + self.dev_ci_channel = json.loads(res)["ci_channel"] + self.job_name = os.getenv("CHECK_NAME", "unknown") + pr_info = PRInfo() + self.pr_number = pr_info.number + self.head_ref = pr_info.head_ref + self.commit_url = pr_info.commit_html_url + + @staticmethod + def _get_webhooks(): + name = "ci_buddy_web_hooks" + + session = boto3.Session(region_name="us-east-1") # Replace with your region + ssm_client = session.client("ssm") + json_string = None + try: + response = ssm_client.get_parameter( + Name=name, + WithDecryption=True, # Set to True if the parameter is a SecureString + ) + json_string = response["Parameter"]["Value"] + except ClientError as e: + print(f"An error occurred: {e}") + + return json_string + + def post(self, message, dry_run=None): + if dry_run is None: + dry_run = self.dry_run + print(f"Posting slack message, dry_run [{dry_run}]") + if dry_run: + url = self.test_channel + else: + url = self.dev_ci_channel + data = {"text": message} + try: + requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10) + except Exception as e: + print(f"ERROR: Failed to post message, ex {e}") + + def post_error(self, error_description, job_name="", with_instance_info=True): + instance_id, instance_type = "unknown", "unknown" + if with_instance_info: + instance_id = Shell.run("ec2metadata --instance-id") or instance_id + instance_type = Shell.run("ec2metadata --instance-type") or instance_type + if not job_name: + job_name = os.getenv("CHECK_NAME", "unknown") + line_err = f":red_circle: *Error: {error_description}*\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + message = line_err + message += line_job + if with_instance_info: + message += line_ghr + if self.pr_number > 0: + message += line_pr_ + else: + message += line_br_ + self.post(message) + + +if __name__ == "__main__": + # test + buddy = CIBuddy(dry_run=True) + buddy.post_error("Out of memory") diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index bef43083a35..8eda6e6b96f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -311,42 +311,42 @@ class CI: random_bucket="parrepl_with_sanitizer", ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 ), JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_TSAN], num_batches=5 + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=2 ), JobNames.STATELESS_TEST_MSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_MSAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_MSAN], num_batches=3 ), JobNames.STATELESS_TEST_UBSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=2 + required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=1 ), JobNames.STATELESS_TEST_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=5 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], ), JobNames.STATELESS_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=6 + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=5 ), JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=3 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=6 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4, release_only=True + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2, release_only=True ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=5, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index e7034d0b104..629f37289a9 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,4 +1,5 @@ import os +import subprocess from contextlib import contextmanager from pathlib import Path from typing import Any, Iterator, List, Union @@ -42,3 +43,43 @@ class GHActions: for line in lines: print(line) print("::endgroup::") + + +class Shell: + @classmethod + def run_strict(cls, command): + subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + + @classmethod + def run(cls, command): + res = "" + result = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode == 0: + res = result.stdout + return res.strip() + + @classmethod + def check(cls, command): + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + return result.returncode == 0 diff --git a/tests/ci/download_release_packages.py b/tests/ci/download_release_packages.py index 8f3a2190ae8..e8260cf68f1 100755 --- a/tests/ci/download_release_packages.py +++ b/tests/ci/download_release_packages.py @@ -13,26 +13,28 @@ from get_previous_release_tag import ( PACKAGES_DIR = Path("previous_release_package_folder") -def download_packages(release: ReleaseInfo, dest_path: Path = PACKAGES_DIR) -> None: +def download_packages( + release: ReleaseInfo, dest_path: Path = PACKAGES_DIR, debug: bool = False +) -> None: dest_path.mkdir(parents=True, exist_ok=True) logging.info("Will download %s", release) for pkg, url in release.assets.items(): - if not pkg.endswith("_amd64.deb") or "-dbg_" in pkg: + if not pkg.endswith("_amd64.deb") or (not debug and "-dbg_" in pkg): continue pkg_name = dest_path / pkg download_build_with_progress(url, pkg_name) -def download_last_release(dest_path: Path) -> None: +def download_last_release(dest_path: Path, debug: bool = False) -> None: current_release = get_previous_release(None) if current_release is None: raise DownloadException("The current release is not found") - download_packages(current_release, dest_path=dest_path) + download_packages(current_release, dest_path=dest_path, debug=debug) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) release = get_release_by_tag(input()) - download_packages(release) + download_packages(release, debug=True) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index ee459ce35a0..4440d0d332c 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -104,6 +104,8 @@ def get_run_command( return ( f"docker run --volume={builds_path}:/package_folder " + # For dmesg and sysctl + "--privileged " f"{ci_logs_args}" f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"{volume_with_broken_test}" @@ -253,7 +255,7 @@ def main(): packages_path.mkdir(parents=True, exist_ok=True) if validate_bugfix_check: - download_last_release(packages_path) + download_last_release(packages_path, debug=True) else: download_all_deb_packages(check_name, reports_path, packages_path) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 7c74f52b610..80ac1935d95 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -185,7 +185,7 @@ def main(): build_path.mkdir(parents=True, exist_ok=True) if validate_bugfix_check: - download_last_release(build_path) + download_last_release(build_path, debug=True) else: download_all_deb_packages(check_name, reports_path, build_path) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8486e3a885f..958dde0606f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -34,10 +34,8 @@ import urllib.parse # for crc32 import zlib from argparse import ArgumentParser -from contextlib import closing from datetime import datetime, timedelta from errno import ESRCH -from queue import Full from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union @@ -69,7 +67,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -TEST_MAX_RUN_TIME_IN_SECONDS = 120 +TEST_MAX_RUN_TIME_IN_SECONDS = 180 class SharedEngineReplacer: @@ -360,37 +358,14 @@ def clickhouse_execute_json( return rows -class Terminated(KeyboardInterrupt): - pass - - -def signal_handler(sig, frame): - raise Terminated(f"Terminated with {sig} signal") - - def stop_tests(): - global stop_tests_triggered_lock - global stop_tests_triggered - global restarted_tests - - with stop_tests_triggered_lock: - print("Stopping tests") - if not stop_tests_triggered.is_set(): - stop_tests_triggered.set() - - # materialize multiprocessing.Manager().list() object before - # sending SIGTERM since this object is a proxy, that requires - # communicating with manager thread, but after SIGTERM will be - # send, this thread will die, and you will get - # ConnectionRefusedError error for any access to "restarted_tests" - # variable. - restarted_tests = [*restarted_tests] - - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) + # send signal to all processes in group to avoid hung check triggering + # (to avoid terminating clickhouse-test itself, the signal should be ignored) + print("Sending signals") + signal.signal(signal.SIGTERM, signal.SIG_IGN) + os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) + signal.signal(signal.SIGTERM, signal.SIG_DFL) + print("Sending signals DONE") def get_db_engine(args, database_name): @@ -736,9 +711,9 @@ def get_localzone(): class SettingsRandomizer: settings = { - "max_insert_threads": lambda: ( - 0 if random.random() < 0.5 else random.randint(1, 16) - ), + "max_insert_threads": lambda: 32 + if random.random() < 0.03 + else random.randint(1, 3), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), "group_by_two_level_threshold_bytes": threshold_generator( 0.2, 0.2, 1, 50000000 @@ -754,7 +729,7 @@ class SettingsRandomizer: "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), "max_joined_block_size_rows": lambda: random.randint(8000, 100000), - "max_threads": lambda: random.randint(1, 64), + "max_threads": lambda: 64 if random.random() < 0.03 else random.randint(1, 3), "optimize_append_index": lambda: random.randint(0, 1), "optimize_if_chain_to_multiif": lambda: random.randint(0, 1), "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1), @@ -1244,6 +1219,11 @@ class TestCase: ): return FailureReason.OBJECT_STORAGE + elif "no-batch" in tags and ( + args.run_by_hash_num is not None or args.run_by_hash_total is not None + ): + return FailureReason.SKIP + elif tags: for build_flag in args.build_flags: if "no-" + build_flag in tags: @@ -1474,8 +1454,7 @@ class TestCase: description_full = messages[result.status] description_full += self.print_test_time(result.total_time) if result.reason is not None: - description_full += " - " - description_full += result.reason.value + description_full += f"\nReason: {result.reason.value} " description_full += result.description @@ -1602,10 +1581,11 @@ class TestCase: # pylint:disable-next=consider-using-with; TODO: fix proc = Popen(command, shell=True, env=os.environ, start_new_session=True) - while ( - datetime.now() - start_time - ).total_seconds() < args.timeout and proc.poll() is None: - sleep(0.01) + try: + proc.wait(args.timeout) + except subprocess.TimeoutExpired: + # Whether the test timed out will be decided later + pass debug_log = "" if os.path.exists(self.testcase_args.debug_log_file): @@ -1627,6 +1607,44 @@ class TestCase: # Normalize hostname in stdout file. replace_in_file(self.stdout_file, socket.gethostname(), "localhost") + if os.environ.get("CLICKHOUSE_PORT_TCP"): + replace_in_file( + self.stdout_file, + f"PORT {os.environ['CLICKHOUSE_PORT_TCP']}", + "PORT 9000", + ) + replace_in_file( + self.stdout_file, + f"localhost {os.environ['CLICKHOUSE_PORT_TCP']}", + "localhost 9000", + ) + + if os.environ.get("CLICKHOUSE_PORT_TCP_SECURE"): + replace_in_file( + self.stdout_file, + f"PORT {os.environ['CLICKHOUSE_PORT_TCP_SECURE']}", + "PORT 9440", + ) + replace_in_file( + self.stdout_file, + f"localhost {os.environ['CLICKHOUSE_PORT_TCP_SECURE']}", + "localhost 9440", + ) + + if os.environ.get("CLICKHOUSE_PATH"): + replace_in_file( + self.stdout_file, + os.environ["CLICKHOUSE_PATH"], + "/var/lib/clickhouse", + ) + + if os.environ.get("CLICKHOUSE_PORT_HTTPS"): + replace_in_file( + self.stdout_file, + f"https://localhost:{os.environ['CLICKHOUSE_PORT_HTTPS']}/", + "https://localhost:8443/", + ) + stdout = "" if os.path.exists(self.stdout_file): with open(self.stdout_file, "rb") as stdfd: @@ -2071,15 +2089,25 @@ class TestSuite: stop_time = None exit_code = None server_died = None -stop_tests_triggered_lock = None -stop_tests_triggered = None -queue = None multiprocessing_manager = None restarted_tests = None -def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): - all_tests, num_tests, test_suite = all_tests_with_params +class ServerDied(Exception): + pass + + +class GlobalTimeout(Exception): + pass + + +def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool]): + ( + all_tests, + num_tests, + test_suite, + is_concurrent, + ) = all_tests_with_params global stop_time global exit_code global server_died @@ -2122,49 +2150,43 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_chain = 0 start_time = datetime.now() - is_concurrent = multiprocessing.current_process().name != "MainProcess" - client_options = get_additional_client_options(args) if num_tests > 0: about = "about " if is_concurrent else "" proc_name = multiprocessing.current_process().name - print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n") + print(f"Running {about}{num_tests} {test_suite.suite} tests ({proc_name}).") while True: - if is_concurrent: - case = queue.get(timeout=args.timeout * 1.1) - if not case: - break + if all_tests: + case = all_tests.pop(0) else: - if all_tests: - case = all_tests.pop(0) - else: - break + break if server_died.is_set(): stop_tests() - break + raise ServerDied("Server died") if stop_time and time() > stop_time: print("\nStop tests run because global time limit is exceeded.\n") stop_tests() - break + raise GlobalTimeout("Stop tests run because global time limit is exceeded") test_case = TestCase(test_suite, case, args, is_concurrent) try: description = "" - test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": " - if not is_concurrent: + test_case_name = removesuffix(test_case.name, ".gen", ".sql") + ": " + + if is_concurrent or args.run_sequential_tests_in_parallel: + description = f"{test_case_name:72}" + else: sys.stdout.flush() - sys.stdout.write(f"{test_cace_name:72}") + sys.stdout.write(f"{test_case_name:72}") # This flush is needed so you can see the test name of the long # running test before it will finish. But don't do it in parallel # mode, so that the lines don't mix. sys.stdout.flush() - else: - description = f"{test_cace_name:72}" while True: test_result = test_case.run( @@ -2191,8 +2213,9 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_total += 1 failures_chain += 1 if test_result.reason == FailureReason.SERVER_DIED: - server_died.set() stop_tests() + server_died.set() + raise ServerDied("Server died") elif test_result.status == TestStatus.SKIPPED: skipped_total += 1 @@ -2203,7 +2226,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): if failures_chain >= args.max_failures_chain: stop_tests() - break + raise ServerDied("Max failures chain") if failures_total > 0: print( @@ -2400,7 +2423,36 @@ def extract_key(key: str) -> str: )[1] -def do_run_tests(jobs, test_suite: TestSuite, parallel): +def override_envs(*args_, **kwargs): + global args + args.client += " --port 19000" + args.http_port = 18123 + args.https_port = 18443 + + updated_env = { + "CLICKHOUSE_CONFIG": "/etc/clickhouse-server3/config.xml", + "CLICKHOUSE_CONFIG_DIR": "/etc/clickhouse-server3", + "CLICKHOUSE_CONFIG_GREP": "/etc/clickhouse-server3/preprocessed/config.xml", + "CLICKHOUSE_USER_FILES": "/var/lib/clickhouse3/user_files", + "CLICKHOUSE_SCHEMA_FILES": "/var/lib/clickhouse3/format_schemas", + "CLICKHOUSE_PATH": "/var/lib/clickhouse3", + "CLICKHOUSE_PORT_TCP": "19000", + "CLICKHOUSE_PORT_TCP_SECURE": "19440", + "CLICKHOUSE_PORT_TCP_WITH_PROXY": "19010", + "CLICKHOUSE_PORT_HTTP": "18123", + "CLICKHOUSE_PORT_HTTPS": "18443", + "CLICKHOUSE_PORT_INTERSERVER": "19009", + "CLICKHOUSE_PORT_KEEPER": "19181", + "CLICKHOUSE_PORT_PROMTHEUS_PORT": "19988", + "CLICKHOUSE_PORT_MYSQL": "19004", + "CLICKHOUSE_PORT_POSTGRESQL": "19005", + } + os.environ.update(updated_env) + + run_tests_array(*args_, **kwargs) + + +def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( "Found", @@ -2409,19 +2461,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): len(test_suite.sequential_tests), "sequential tests", ) - run_n, run_total = parallel.split("/") - run_n = float(run_n) - run_total = float(run_total) tests_n = len(test_suite.parallel_tests) - run_total = min(run_total, tests_n) - jobs = min(jobs, tests_n) - run_total = max(jobs, run_total) - - batch_size = max(1, len(test_suite.parallel_tests) // jobs) - parallel_tests_array = [] - for _ in range(jobs): - parallel_tests_array.append((None, batch_size, test_suite)) # If we don't do random shuffling then there will be always # nearly the same groups of test suites running concurrently. @@ -2434,33 +2475,60 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) + batch_size = max(1, len(test_suite.parallel_tests) // jobs) + parallel_tests_array = [] + for job in range(jobs): + range_ = job * batch_size, job * batch_size + batch_size + batch = test_suite.parallel_tests[range_[0] : range_[1]] + parallel_tests_array.append((batch, batch_size, test_suite, True)) + try: - with closing(multiprocessing.Pool(processes=jobs)) as pool: - pool.map_async(run_tests_array, parallel_tests_array) + with multiprocessing.Pool(processes=jobs + 1) as pool: + future = pool.map_async(run_tests_array, parallel_tests_array) - for suit in test_suite.parallel_tests: - queue.put(suit, timeout=args.timeout * 1.1) + if args.run_sequential_tests_in_parallel: + # Run parallel tests and sequential tests at the same time + # Sequential tests will use different ClickHouse instance + # In this process we can safely override values in `args` and `os.environ` + future_seq = pool.map_async( + override_envs, + [ + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ) + ], + ) + future_seq.wait() - for _ in range(jobs): - queue.put(None, timeout=args.timeout * 1.1) + future.wait() + finally: + pool.terminate() + pool.close() + pool.join() - queue.close() - except Full: - print( - "Couldn't put test to the queue within timeout. Server probably hung." + if not args.run_sequential_tests_in_parallel: + run_tests_array( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ) ) - print_stacktraces() - queue.close() - - pool.join() - - run_tests_array( - (test_suite.sequential_tests, len(test_suite.sequential_tests), test_suite) - ) return len(test_suite.sequential_tests) + len(test_suite.parallel_tests) else: num_tests = len(test_suite.all_tests) - run_tests_array((test_suite.all_tests, num_tests, test_suite)) + run_tests_array( + ( + test_suite.all_tests, + num_tests, + test_suite, + False, + ) + ) return num_tests @@ -2765,6 +2833,7 @@ def main(args): f"{get_db_engine(args, db_name)}", settings=get_create_database_settings(args, None), ) + break except HTTPError as e: total_time = (datetime.now() - start_time).total_seconds() if not need_retry(args, e.message, e.message, total_time): @@ -2817,7 +2886,7 @@ def main(args): test_suite.cloud_skip_list = cloud_skip_list test_suite.private_skip_list = private_skip_list - total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) + total_tests_run += do_run_tests(args.jobs, test_suite) if server_died.is_set(): exit_code.value = 1 @@ -3277,6 +3346,15 @@ def parse_args(): help="Replace ordinary MergeTree engine with SharedMergeTree", ) + parser.add_argument( + "--run-sequential-tests-in-parallel", + action="store_true", + default=False, + help="If `true`, tests with the tag `no-parallel` will run on a " + "separate ClickHouse instance in parallel with other tests. " + "This is used in CI to make test jobs run faster.", + ) + return parser.parse_args() @@ -3284,9 +3362,6 @@ if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) server_died = multiprocessing.Event() - stop_tests_triggered_lock = multiprocessing.Lock() - stop_tests_triggered = multiprocessing.Event() - queue = multiprocessing.Queue(maxsize=1) multiprocessing_manager = multiprocessing.Manager() restarted_tests = multiprocessing_manager.list() @@ -3294,9 +3369,6 @@ if __name__ == "__main__": # infinite tests processes left # (new process group is required to avoid killing some parent processes) os.setpgid(0, 0) - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGHUP, signal_handler) try: args = parse_args() diff --git a/tests/config/install.sh b/tests/config/install.sh index 08ee11a7407..8b58a519bc9 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -57,7 +57,6 @@ ln -sf $SRC_PATH/config.d/forbidden_headers.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_disks_base_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 33dd85aceaf..3c3d1b6cc96 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,7 @@ "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_zookeeper_config_load_balancing/test.py::test_az", "test_zookeeper_fallback_session/test.py::test_fallback_session", "test_global_overcommit_tracker/test.py::test_global_overcommit", diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 97d5da787cd..56ea5c8846a 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -9,7 +9,9 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "disks_app_test", main_configs=["config.xml"], with_minio=True + "disks_app_test", + main_configs=["config.xml"], + with_minio=True, ) cluster.start() @@ -47,12 +49,18 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list-disks"] ) - disks = out.split("\n") + disks = list( + sorted( + map( + lambda x: x.split(":")[0], filter(lambda x: len(x) > 1, out.split("\n")) + ) + ) + ) - assert disks[0] == "default" and disks[1] == "test1" and disks[2] == "test2" + assert disks[:4] == ["default", "local", "test1", "test2"] def test_disks_app_func_ls(started_cluster): @@ -61,7 +69,15 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -75,9 +91,8 @@ def test_disks_app_func_ls(started_cluster): "--save-logs", "--disk", "test1", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) @@ -102,8 +117,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "write", - "path1", + "--query", + "'write path1'", ] ), ] @@ -113,18 +128,21 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "copy", - "--disk-from", - "test1", - "--disk-to", - "test2", - ".", - ".", + "--query", + "copy --recursive --disk-from test1 --disk-to test2 . .", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" in out @@ -136,8 +154,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path1", + "--query", + "remove path1", ] ) @@ -148,21 +166,37 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "remove", - "path1", + "--query", + "remove path1", ] ) # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) assert "path1" not in out @@ -177,14 +211,13 @@ def test_disks_app_func_ln(started_cluster): [ "/usr/bin/clickhouse", "disks", - "link", - "data/default/test_table", - "data/default/z_tester", + "--query", + "link data/default/test_table data/default/z_tester", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list data/default/"] ) files = out.split("\n") @@ -209,15 +242,23 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "write", - "path3", + "--query", + "'write path3'", ] ), ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" in out @@ -229,13 +270,21 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path3", + "--query", + "remove path3", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" not in out @@ -247,7 +296,15 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -260,14 +317,21 @@ def test_disks_app_func_mv(started_cluster): "disks", "--disk", "test1", - "move", - "store", - "old_store", + "--query", + "move store old_store", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -290,8 +354,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "write", - "5.txt", + "--query", + "'write 5.txt'", ] ), ] @@ -304,8 +368,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "read", - "5.txt", + "--query", + "read 5.txt", ] ) @@ -319,7 +383,15 @@ def test_remote_disk_list(started_cluster): init_data_s3(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test3", + "--query", + "list .", + ] ) files = out.split("\n") @@ -333,9 +405,8 @@ def test_remote_disk_list(started_cluster): "--save-logs", "--disk", "test3", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) diff --git a/tests/integration/test_disks_app_interactive/__init__.py b/tests/integration/test_disks_app_interactive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disks_app_interactive/configs/config.xml b/tests/integration/test_disks_app_interactive/configs/config.xml new file mode 100644 index 00000000000..bcbb107f0a2 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/configs/config.xml @@ -0,0 +1,3 @@ + + /var/lib/clickhouse/ + \ No newline at end of file diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py new file mode 100644 index 00000000000..ca4ba5d9065 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/test.py @@ -0,0 +1,331 @@ +from helpers.cluster import ClickHouseCluster + +import pytest + +import pathlib + +import subprocess +import select +import io +from typing import List, Tuple, Dict, Union, Optional + +import os + + +class ClickHouseDisksException(Exception): + pass + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "disks_app_test", + main_configs=["server_configs/config.xml"], + with_minio=True, + ) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +class DisksClient(object): + SEPARATOR = b"\a\a\a\a\n" + local_client: Optional["DisksClient"] = None # static variable + default_disk_root_directory: str = "/var/lib/clickhouse" + + def __init__(self, bin_path: str, config_path: str, working_path: str): + self.bin_path = bin_path + self.working_path = working_path + + self.proc = subprocess.Popen( + [bin_path, "disks", "--test-mode", "--config", config_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + self.poller = select.epoll() + self.poller.register(self.proc.stdout) + self.poller.register(self.proc.stderr) + + self.stopped = False + + self._fd_nums = { + self.proc.stdout.fileno(): self.proc.stdout, + self.proc.stderr.fileno(): self.proc.stderr, + } + + def execute_query(self, query: str, timeout: float = 5.0) -> str: + output = io.BytesIO() + + self.proc.stdin.write(query.encode() + b"\n") + self.proc.stdin.flush() + + events = self.poller.poll(timeout) + if not events: + raise TimeoutError(f"Disks client returned no output") + + for fd_num, event in events: + if event & (select.EPOLLIN | select.EPOLLPRI): + file = self._fd_nums[fd_num] + + if file == self.proc.stdout: + while True: + chunk = file.readline() + if chunk.endswith(self.SEPARATOR): + break + + output.write(chunk) + + elif file == self.proc.stderr: + error_line = self.proc.stderr.readline() + print(error_line) + raise ClickHouseDisksException(error_line.strip().decode()) + + else: + raise ValueError(f"Failed to read from pipe. Flag {event}") + + data = output.getvalue().strip().decode() + return data + + def list_disks(self) -> List[Tuple[str, str]]: + output = self.execute_query("list-disks") + return list( + sorted( + map( + lambda x: (x.split(":")[0], ":".join(x.split(":")[1:])), + output.split("\n"), + ) + ) + ) + + def current_disk_with_path(self) -> Tuple[str, str]: + output = self.execute_query("current_disk_with_path") + disk_line = output.split("\n")[0] + path_line = output.split("\n")[1] + assert disk_line.startswith("Disk: ") + assert path_line.startswith("Path: ") + return disk_line[6:], path_line[6:] + + def ls( + self, path: str, recursive: bool = False, show_hidden: bool = False + ) -> Union[List[str], Dict[str, List[str]]]: + recursive_adding = "--recursive " if recursive else "" + show_hidden_adding = "--all " if show_hidden else "" + output = self.execute_query( + f"list {path} {recursive_adding} {show_hidden_adding}" + ) + if recursive: + answer: Dict[str, List[str]] = dict() + blocks = output.split("\n\n") + for block in blocks: + directory = block.split("\n")[0][:-1] + files = block.split("\n")[1:] + answer[directory] = files + return answer + else: + return output.split("\n") + + def switch_disk(self, disk: str, directory: Optional[str] = None): + directory_addition = f"--path {directory} " if directory is not None else "" + self.execute_query(f"switch-disk {disk} {directory_addition}") + + def cd(self, directory: str, disk: Optional[str] = None): + disk_addition = f"--disk {disk} " if disk is not None else "" + self.execute_query(f"cd {directory} {disk_addition}") + + def copy( + self, + path_from, + path_to, + disk_from: Optional[str] = None, + disk_to: Optional[str] = None, + recursive: bool = False, + ): + disk_from_option = f"--disk-from {disk_from} " if disk_from is not None else "" + disk_to_option = f"--disk-to {disk_to} " if disk_to is not None else "" + recursive_tag = "--recursive" if recursive else "" + + self.execute_query( + f"copy {recursive_tag} {path_from} {path_to} {disk_from_option} {disk_to_option}" + ) + + def move(self, path_from: str, path_to: str): + self.execute_query(f"move {path_from} {path_to}") + + def rm(self, path: str, recursive: bool = False): + recursive_tag = "--recursive" if recursive else "" + self.execute_query(f"rm {recursive_tag} {path}") + + def mkdir(self, path: str, recursive: bool = False): + recursive_adding = "--recursive " if recursive else "" + self.execute_query(f"mkdir {path} {recursive_adding}") + + def ln(self, path_from: str, path_to: str): + self.execute_query(f"link {path_from} {path_to}") + + def read(self, path_from: str, path_to: Optional[str] = None): + path_to_adding = f"--path-to {path_to} " if path_to is not None else "" + output = self.execute_query(f"read {path_from} {path_to_adding}") + return output + + def write( + self, path_from: str, path_to: str + ): # Writing from stdin is difficult to test (do not know how to do this in python) + path_from_adding = f"--path-from {path_from}" + self.execute_query(f"write {path_from_adding} {path_to}") + + @staticmethod + def getLocalDisksClient(refresh: bool): + if (DisksClient.local_client is None) or refresh: + binary_file = os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH") + current_working_directory = str(pathlib.Path().resolve()) + config_file = f"{current_working_directory}/test_disks_app_interactive/configs/config.xml" + if not os.path.exists(DisksClient.default_disk_root_directory): + os.mkdir(DisksClient.default_disk_root_directory) + + DisksClient.local_client = DisksClient( + binary_file, config_file, current_working_directory + ) + return DisksClient.local_client + else: + return DisksClient.local_client + + +def test_disks_app_interactive_list_disks(): + client = DisksClient.getLocalDisksClient(True) + expected_disks_with_path = [ + ("default", "/"), + ("local", client.working_path), + ] + assert expected_disks_with_path == client.list_disks() + assert client.current_disk_with_path() == ("default", "/") + client.switch_disk("local") + assert client.current_disk_with_path() == ( + "local", + client.working_path, + ) + + +def test_disks_app_interactive_list_files_local(): + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("local") + excepted_listed_files = sorted(os.listdir("test_disks_app_interactive/")) + listed_files = sorted(client.ls("test_disks_app_interactive/")) + assert excepted_listed_files == listed_files + + +def test_disks_app_interactive_list_directories_default(): + client = DisksClient.getLocalDisksClient(True) + traversed_dir = client.ls(".", recursive=True) + client.mkdir("dir1") + client.mkdir("dir2") + client.mkdir(".dir3") + client.cd("dir1") + client.mkdir("dir11") + client.mkdir(".dir12") + client.mkdir("dir13") + client.cd("../dir2") + client.mkdir("dir21") + client.mkdir("dir22") + client.mkdir(".dir23") + client.cd("../.dir3") + client.mkdir("dir31") + client.mkdir(".dir32") + client.cd("..") + traversed_dir = client.ls(".", recursive=True) + assert traversed_dir == { + ".": ["dir1", "dir2"], + "./dir1": ["dir11", "dir13"], + "./dir2": ["dir21", "dir22"], + "./dir1/dir11": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1", "dir2"], + "./dir1": [".dir12", "dir11", "dir13"], + "./dir2": [".dir23", "dir21", "dir22"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + "./dir2/.dir23": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + client.rm("dir2", recursive=True) + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1"], + "./dir1": [".dir12", "dir11", "dir13"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=False) + assert traversed_dir == { + ".": ["dir1"], + "./dir1": ["dir11", "dir13"], + "./dir1/dir11": [], + "./dir1/dir13": [], + } + client.rm("dir1", recursive=True) + client.rm(".dir3", recursive=True) + assert client.ls(".", recursive=True, show_hidden=False) == {".": []} + + +def test_disks_app_interactive_cp_and_read(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt") + assert initial_text == read_text + client.mkdir("dir1") + client.copy("a.txt", "/dir1/b.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt", path_to="dir1/b.txt") + assert "" == read_text + read_text = client.read("/dir1/b.txt") + assert read_text == initial_text + with open(f"{DisksClient.default_disk_root_directory}/dir1/b.txt", "r") as file: + read_text = file.read() + assert read_text == initial_text + os.remove("a.txt") + client.rm("a.txt") + client.rm("/dir1", recursive=True) + + +def test_disks_app_interactive_test_move_and_write(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + files = client.ls(".") + assert files == ["a.txt"] + client.move("a.txt", "b.txt") + files = client.ls(".") + assert files == ["b.txt"] + read_text = client.read("/b.txt") + assert read_text == initial_text + client.write("b.txt", "c.txt") + read_text = client.read("c.txt") + assert read_text == initial_text + os.remove("a.txt") diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 14c11bc8500..87b2c45bbc5 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,8 +29,6 @@ def get_counts(): def test_basic(start_cluster): - old_src, old_a, old_b, old_c = 0, 0, 0, 0 - node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -41,15 +39,6 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) - - src, a, b, c = get_counts() - assert src == old_src + 1 - assert a == old_a + 2 - assert b == old_b + 2 - assert c == old_c + 2 - old_src, old_a, old_b, old_c = src, a, b, c - - # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -57,51 +46,34 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication only for src table + old_src, old_a, old_b, old_c = get_counts() + # number of rows in test_mv_a and test_mv_c depends on order of inserts into views + assert old_src == 11 + assert old_a in (1, 11) + assert old_b == 1 + assert old_c in (1, 11) + node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for all the tables - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() + # no changes because of deduplication in source table assert src == old_src assert a == old_a assert b == old_b assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c - # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == 11 + assert a == old_a + 10 # first insert could be succesfull with disabled dedup + assert b == 11 + assert c == old_c + 10 + with pytest.raises(QueryRuntimeException): node.query( """ @@ -110,23 +82,16 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() - assert src == old_src - assert a == old_a - assert b == old_b + 10 - assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c + assert src == 21 + assert a == old_a + 20 + assert b == 21 + assert c == old_c + 20 diff --git a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml index 2d7946d1587..43d80ee6f69 100644 --- a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml +++ b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml @@ -9,4 +9,21 @@ value1 + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml index 15da914f666..7d4f0543ff1 100644 --- a/tests/integration/test_named_collections/configs/users.d/users.xml +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -1,4 +1,9 @@ + + + 0 + + diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index dbc502236c0..32846c79d23 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -3,6 +3,8 @@ import pytest import os import time from helpers.cluster import ClickHouseCluster +from contextlib import nullcontext as does_not_raise +from helpers.client import QueryRuntimeException SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NAMED_COLLECTIONS_CONFIG = os.path.join( @@ -761,3 +763,32 @@ def test_keeper_storage(cluster): check_dropped(node1) check_dropped(node2) + + +@pytest.mark.parametrize( + "ignore, expected_raise", + [(True, does_not_raise()), (False, pytest.raises(QueryRuntimeException))], +) +def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): + node = cluster.instances["node_with_keeper"] + + replace_in_users_config( + node, + "ignore_on_cluster_for_replicated_named_collections_queries>.", + f"ignore_on_cluster_for_replicated_named_collections_queries>{int(ignore)}", + ) + node.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node.query( + "DROP NAMED COLLECTION IF EXISTS test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) + node.query( + f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" + ) + node.query( + f"ALTER NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` SET key2=3" + ) + node.query( + f"DROP NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index 07a9e2badff..375fe58d741 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -5,7 +5,10 @@ cluster = ClickHouseCluster(__file__) nodes = [ cluster.add_instance( - f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True + f"n{i}", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={"replica": f"r{i}"}, ) for i in range(1, 5) ] @@ -20,34 +23,17 @@ def start_cluster(): cluster.shutdown() -def create_tables(cluster): - n1 = nodes[0] - n1.query("DROP TABLE IF EXISTS dist_table") - n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}") - - n1.query( - f"CREATE TABLE test_table ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" - ) - n1.query( - f""" - CREATE TABLE dist_table AS test_table - Engine=Distributed( - {cluster}, - currentDatabase(), - test_table, - rand() - ) - """ +def insert_data(table_name, row_num, all_nodes=False): + query = ( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers({row_num})" ) - -def insert_data(cluster, row_num): - create_tables(cluster) - n1 = nodes[0] - n1.query( - f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})" - ) - n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") + if all_nodes: + for n in nodes: + n.query(query) + else: + n1 = nodes[0] + n1.query(query) @pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) @@ -56,12 +42,36 @@ def insert_data(cluster, row_num): "cluster", ["test_multiple_shards_multiple_replicas", "test_single_shard_multiple_replicas"], ) -def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter_type): +def test_parallel_replicas_custom_key_distributed( + start_cluster, cluster, custom_key, filter_type +): for node in nodes: node.rotate_logs() row_num = 1000 - insert_data(cluster, row_num) + + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS dist_table ON CLUSTER {cluster} SYNC") + n1.query(f"DROP TABLE IF EXISTS test_table_for_dist ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_dist ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + n1.query( + f""" + CREATE TABLE dist_table AS test_table_for_dist + Engine=Distributed( + {cluster}, + currentDatabase(), + test_table_for_dist, + rand() + ) + """ + ) + + insert_data("dist_table", row_num) + + n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") expected_result = "" for i in range(4): @@ -72,10 +82,10 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter n1.query( "SELECT key, count() FROM dist_table GROUP BY key ORDER BY key", settings={ - "prefer_localhost_replica": 0, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, + "prefer_localhost_replica": 0, }, ) == expected_result @@ -87,3 +97,87 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_mergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_mt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_mt ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_mt", row_num, all_nodes=True) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_mt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "parallel_replicas_for_non_replicated_merge_tree": 1, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_replicatedmergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_rmt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_rmt ON CLUSTER {cluster} (key UInt32, value String) Engine=ReplicatedMergeTree('/clickhouse/tables', '{{replica}}') ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_rmt", row_num, all_nodes=False) + + for node in nodes: + node.query("SYSTEM SYNC REPLICA test_table_for_rmt LIGHTWEIGHT") + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_rmt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 3ba3ce092c3..f24a24f3238 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -76,11 +76,11 @@ def test_parallel_replicas_custom_key_failover( f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", settings={ "log_comment": log_comment, - "prefer_localhost_replica": prefer_localhost_replica, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, "use_hedged_requests": use_hedged_requests, + "prefer_localhost_replica": prefer_localhost_replica, # avoid considering replica delay on connection choice # otherwise connection can be not distributed evenly among available nodes # and so custom key secondary queries (we check it bellow) @@ -100,20 +100,19 @@ def test_parallel_replicas_custom_key_failover( assert query_id != "" query_id = query_id[:-1] - if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # With enabled hedged requests, we can't guarantee exact query distribution among nodes + # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice + if use_hedged_requests == 0: assert ( node1.query( - f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" ) - == "subqueries\t4\n" + == "n1\t3\nn3\t2\n" ) - - # With enabled hedged requests, we can't guarantee exact query distribution among nodes - # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice - if use_hedged_requests == 0: - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" - ) - == "n1\t3\nn3\t2\n" - ) diff --git a/tests/integration/test_startup_scripts/__init__.py b/tests/integration/test_startup_scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_startup_scripts/configs/config.d/query_log.xml b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml new file mode 100644 index 00000000000..24d66fc674e --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml @@ -0,0 +1,8 @@ + + + system + query_log
+ toYYYYMM(event_date) + 1000 +
+
diff --git a/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml new file mode 100644 index 00000000000..e8a711a926a --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml @@ -0,0 +1,17 @@ + + + + CREATE ROLE OR REPLACE testrole + + + GRANT CREATE USER, ALTER USER, DROP USER, SHOW USERS, SHOW CREATE USER ON *.* TO 'testrole' WITH GRANT OPTION; + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + SELECT * FROM system.query_log LIMIT 1 + + + diff --git a/tests/integration/test_startup_scripts/configs/users.xml b/tests/integration/test_startup_scripts/configs/users.xml new file mode 100644 index 00000000000..f9917b034b2 --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/users.xml @@ -0,0 +1,41 @@ + + + + + + + + 1 + + + + + + + + + + ::/0 + + + default + + default + + + + + + + + 3600 + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/integration/test_startup_scripts/test.py b/tests/integration/test_startup_scripts/test.py new file mode 100644 index 00000000000..43a871a6fc5 --- /dev/null +++ b/tests/integration/test_startup_scripts/test.py @@ -0,0 +1,21 @@ +from helpers.cluster import ClickHouseCluster + + +def test_startup_scripts(): + cluster = ClickHouseCluster(__file__) + + node = cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/query_log.xml", + "configs/config.d/startup_scripts.xml", + ], + with_zookeeper=False, + ) + + try: + cluster.start() + assert node.query("SHOW TABLES") == "TestTable\n" + + finally: + cluster.shutdown() diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 4cb71895881..d3dd7cfe52a 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -596,19 +596,116 @@ def test_partition_columns(started_cluster): ) assert result == 1 - # instance.query( - # f""" - # DROP TABLE IF EXISTS {TABLE_NAME}; - # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) - # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" - # ) - # assert ( - # int( - # instance.query( - # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" - # ) - # ) - # == num_rows - 1 - # ) - # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") - # assert False + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (a Nullable(Int32), b Nullable(String), c Nullable(Date32), d Nullable(Int32), e Nullable(Bool)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/05')" + ) + ) + == 1 + ) + + # Subset of columns should work. + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (b Nullable(String), c Nullable(Date32), d Nullable(Int32)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """test1 2000-01-01 1 +test2 2000-01-02 2 +test3 2000-01-03 3 +test4 2000-01-04 4 +test5 2000-01-05 5 +test6 2000-01-06 6 +test7 2000-01-07 7 +test8 2000-01-08 8 +test9 2000-01-09 9""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + for i in range(num_rows + 1, 2 * num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + ok = False + for file in files: + if file.endswith("last_checkpoint"): + ok = True + assert ok + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows * 2 + + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false +10 test10 2000-01-10 10 false +11 test11 2000-01-11 11 false +12 test12 2000-01-12 12 false +13 test13 2000-01-13 13 false +14 test14 2000-01-14 14 false +15 test15 2000-01-15 15 false +16 test16 2000-01-16 16 false +17 test17 2000-01-17 17 false +18 test18 2000-01-18 18 false""" + == instance.query( + f""" +SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') ORDER BY c + """ + ).strip() + ) + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/15')" + ) + ) + == 1 + ) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3240039ee81..f885a3507ac 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -78,13 +78,13 @@ def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): def kill_rabbitmq(rabbitmq_id): p = subprocess.Popen(("docker", "stop", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) return p.returncode == 0 def revive_rabbitmq(rabbitmq_id, cookie): p = subprocess.Popen(("docker", "start", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) wait_rabbitmq_to_start(rabbitmq_id, cookie) diff --git a/tests/performance/replaceRegexp_fallback.xml b/tests/performance/replaceRegexp_fallback.xml new file mode 100644 index 00000000000..509257efeb5 --- /dev/null +++ b/tests/performance/replaceRegexp_fallback.xml @@ -0,0 +1,12 @@ + + + + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null + + + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null + diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 0ca7df8ecd3..07c42d6d039 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-ubsan, no-batch -- no-parallel because we want to run this test when most of the other tests already passed -- If this test fails, see the "Top patterns of log messages" diagnostics in the end of run.log diff --git a/tests/queries/0_stateless/00027_argMinMax.sql b/tests/queries/0_stateless/00027_argMinMax.sql index dbf7c9176d2..57f815add27 100644 --- a/tests/queries/0_stateless/00027_argMinMax.sql +++ b/tests/queries/0_stateless/00027_argMinMax.sql @@ -13,4 +13,4 @@ FROM SELECT arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, arrayJoin([[1, 2, 4]]) AS id -) +); diff --git a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh index a42fd58190a..d57efaa1f0e 100755 --- a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh +++ b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, shard, no-parallel +# Tags: long, shard CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00307_format_xml.sql b/tests/queries/0_stateless/00307_format_xml.sql index 7fdca83b69f..29c733bb186 100644 --- a/tests/queries/0_stateless/00307_format_xml.sql +++ b/tests/queries/0_stateless/00307_format_xml.sql @@ -1,2 +1,2 @@ SET output_format_write_statistics = 0; -SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1 FORMAT XML; +SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index 87a1ea454d0..b0939c00a10 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -1,4 +1,6 @@ SET output_format_write_statistics = 0; +SET enable_named_columns_in_function_tuple = 0; + SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinary; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinaryWithNamesAndTypes; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT TabSeparatedWithNamesAndTypes; diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 98dd300e6ab..83a6a4e8043 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long set -e diff --git a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql index 7c939d060ea..14929045356 100644 --- a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql +++ b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql @@ -1,5 +1,8 @@ set allow_deprecated_syntax_for_merge_tree=1; +set max_threads = 1; +set max_insert_threads = 1; + drop table if exists test_ins_arr; create table test_ins_arr (date Date, val Array(UInt64)) engine = MergeTree(date, (date), 8192); insert into test_ins_arr select toDate('2017-10-02'), [number, 42] from system.numbers limit 10000; diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 9c9281dc7e4..adf6abb7298 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -3 +2 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 51e6a513608..d3c4da86b41 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default +-- Implicit insert isn't deduplicated SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 6a682210489..7a71d17f19b 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none @@ -7,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600" -${CLICKHOUSE_CLIENT} -q "create user u_00600 settings max_execution_time=60, readonly=1" -${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600" +TEST_PREFIX=$RANDOM +${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}" +${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1" +${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}" function wait_for_query_to_start() { @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600 --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id @@ -43,4 +43,4 @@ wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null wait ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' -${CLICKHOUSE_CLIENT} -q "drop user u_00600" +${CLICKHOUSE_CLIENT} -q "drop user u_00600${TEST_PREFIX}" diff --git a/tests/queries/0_stateless/00623_truncate_all_tables.sql b/tests/queries/0_stateless/00623_truncate_all_tables.sql index 2d5e9d48f59..2626f7ed285 100644 --- a/tests/queries/0_stateless/00623_truncate_all_tables.sql +++ b/tests/queries/0_stateless/00623_truncate_all_tables.sql @@ -1,50 +1,43 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS truncate_test; - -CREATE DATABASE IF NOT EXISTS truncate_test; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_set(id UInt64) ENGINE = Set; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_log(id UInt64) ENGINE = Log; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_memory(id UInt64) ENGINE = Memory; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_tiny_log(id UInt64) ENGINE = TinyLog; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_stripe_log(id UInt64) ENGINE = StripeLog; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_merge_tree(p Date, k UInt64) ENGINE = MergeTree ORDER BY p; +CREATE TABLE IF NOT EXISTS truncate_test_set(id UInt64) ENGINE = Set; +CREATE TABLE IF NOT EXISTS truncate_test_log(id UInt64) ENGINE = Log; +CREATE TABLE IF NOT EXISTS truncate_test_memory(id UInt64) ENGINE = Memory; +CREATE TABLE IF NOT EXISTS truncate_test_tiny_log(id UInt64) ENGINE = TinyLog; +CREATE TABLE IF NOT EXISTS truncate_test_stripe_log(id UInt64) ENGINE = StripeLog; +CREATE TABLE IF NOT EXISTS truncate_test_merge_tree(p Date, k UInt64) ENGINE = MergeTree ORDER BY p; SELECT '======Before Truncate======'; -INSERT INTO truncate_test.truncate_test_set VALUES(0); -INSERT INTO truncate_test.truncate_test_log VALUES(1); -INSERT INTO truncate_test.truncate_test_memory VALUES(1); -INSERT INTO truncate_test.truncate_test_tiny_log VALUES(1); -INSERT INTO truncate_test.truncate_test_stripe_log VALUES(1); -INSERT INTO truncate_test.truncate_test_merge_tree VALUES('2000-01-01', 1); -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; +INSERT INTO truncate_test_set VALUES(0); +INSERT INTO truncate_test_log VALUES(1); +INSERT INTO truncate_test_memory VALUES(1); +INSERT INTO truncate_test_tiny_log VALUES(1); +INSERT INTO truncate_test_stripe_log VALUES(1); +INSERT INTO truncate_test_merge_tree VALUES('2000-01-01', 1); +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; SELECT '======After Truncate And Empty======'; -TRUNCATE ALL TABLES FROM IF EXISTS truncate_test; -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; +TRUNCATE ALL TABLES FROM IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; SELECT '======After Truncate And Insert Data======'; -INSERT INTO truncate_test.truncate_test_set VALUES(0); -INSERT INTO truncate_test.truncate_test_log VALUES(1); -INSERT INTO truncate_test.truncate_test_memory VALUES(1); -INSERT INTO truncate_test.truncate_test_tiny_log VALUES(1); -INSERT INTO truncate_test.truncate_test_stripe_log VALUES(1); -INSERT INTO truncate_test.truncate_test_merge_tree VALUES('2000-01-01', 1); -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; - -DROP DATABASE IF EXISTS truncate_test; +INSERT INTO truncate_test_set VALUES(0); +INSERT INTO truncate_test_log VALUES(1); +INSERT INTO truncate_test_memory VALUES(1); +INSERT INTO truncate_test_tiny_log VALUES(1); +INSERT INTO truncate_test_stripe_log VALUES(1); +INSERT INTO truncate_test_merge_tree VALUES('2000-01-01', 1); +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; diff --git a/tests/queries/0_stateless/00623_truncate_table.sql b/tests/queries/0_stateless/00623_truncate_table.sql index 4a67e49acda..e35803db1d9 100644 --- a/tests/queries/0_stateless/00623_truncate_table.sql +++ b/tests/queries/0_stateless/00623_truncate_table.sql @@ -1,6 +1,5 @@ set allow_deprecated_syntax_for_merge_tree=1; -DROP DATABASE IF EXISTS truncate_test; DROP TABLE IF EXISTS truncate_test_log; DROP TABLE IF EXISTS truncate_test_memory; DROP TABLE IF EXISTS truncate_test_tiny_log; @@ -9,7 +8,6 @@ DROP TABLE IF EXISTS truncate_test_merge_tree; DROP TABLE IF EXISTS truncate_test_materialized_view; DROP TABLE IF EXISTS truncate_test_materialized_depend; -CREATE DATABASE truncate_test; CREATE TABLE truncate_test_set(id UInt64) ENGINE = Set; CREATE TABLE truncate_test_log(id UInt64) ENGINE = Log; CREATE TABLE truncate_test_memory(id UInt64) ENGINE = Memory; @@ -75,4 +73,3 @@ DROP TABLE IF EXISTS truncate_test_stripe_log; DROP TABLE IF EXISTS truncate_test_merge_tree; DROP TABLE IF EXISTS truncate_test_materialized_view; DROP TABLE IF EXISTS truncate_test_materialized_depend; -DROP DATABASE IF EXISTS truncate_test; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 8f7d19028b0..1fb219108da 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index b7dea25c182..ceba24df7e4 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel - set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" +DB_SUFFIX=$RANDOM +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" function query() { @@ -16,8 +15,8 @@ function query() while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 50 ]; do it=$((it+1)) - ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl" - ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" + ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl_${DB_SUFFIX}" + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" done } @@ -27,4 +26,4 @@ done wait -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 2006d43cdd2..444a66767aa 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -21,7 +21,7 @@ function thread1() function thread2() { - seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' + seq 1 500 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' } thread1 & diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index 71acc11b971..0ed9593c689 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -50,7 +50,7 @@ export -f thread2; export -f thread3; export -f thread4; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null & diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh index 238cdcea547..ae728c8d10d 100755 --- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh +++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-parallel, no-debug +# Tags: deadlock, no-debug # NOTE: database = $CLICKHOUSE_DATABASE is unwanted @@ -49,7 +49,7 @@ function thread_select() export -f thread_drop_create export -f thread_select -TIMEOUT=60 +TIMEOUT=30 thread_drop_create $TIMEOUT & thread_select $TIMEOUT & diff --git a/tests/queries/0_stateless/00910_buffer_prewhere.sql b/tests/queries/0_stateless/00910_buffer_prewhere.sql index deda0db85fb..e6b1cc424ad 100644 --- a/tests/queries/0_stateless/00910_buffer_prewhere.sql +++ b/tests/queries/0_stateless/00910_buffer_prewhere.sql @@ -1,9 +1,4 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS test_buffer; -CREATE DATABASE test_buffer; -CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); -CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); -INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); -SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); -DROP DATABASE test_buffer; +CREATE TABLE mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); +CREATE TABLE buf as mt ENGINE = Buffer({CLICKHOUSE_DATABASE:Identifier}, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); +INSERT INTO buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); +SELECT count() from buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index be75edcdb61..016e662ea3b 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -1,12 +1,13 @@ #!/usr/bin/env bash -# Tags: no-parallel - # shellcheck disable=SC2016,SC2028 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +CURDIR=$CURDIR/${CLICKHOUSE_DATABASE} +mkdir -p $CURDIR + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template2"; $CLICKHOUSE_CLIENT --query="CREATE TABLE template1 (s1 String, s2 String, s3 String, s4 String, n UInt64, d Date) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index a05515cf756..407e124f137 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; diff --git a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh index 0a6888a5c69..3046fcbcd73 100755 --- a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh +++ b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -89,7 +89,7 @@ ${CLICKHOUSE_CLIENT} -n -q " " -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh index 06a460f3600..0d57bb25543 100755 --- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest +# Tags: zookeeper, no-fasttest set -e @@ -61,7 +61,7 @@ export -f thread3; export -f thread4; export -f thread5; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null & diff --git a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql index 9040d7b3231..d0841124706 100644 --- a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql +++ b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql @@ -1,35 +1,25 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01036; -CREATE DATABASE dict_db_01036; - -CREATE TABLE dict_db_01036.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01036.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01036')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT 'SYSTEM RELOAD DICTIONARY'; -SYSTEM RELOAD DICTIONARY dict_db_01036.dict; -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT 'CREATE DATABASE'; DROP DATABASE IF EXISTS empty_db_01036; -CREATE DATABASE empty_db_01036; -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; - -DROP DICTIONARY dict_db_01036.dict; -DROP TABLE dict_db_01036.dict_data; -DROP DATABASE dict_db_01036; -DROP DATABASE empty_db_01036; +CREATE DATABASE IF NOT EXISTS empty_db_01036; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans index 6e31edbdd40..0a3f4123eb8 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans @@ -1,1000 +1,1000 @@ -dictGet test_01037.dict_array (29.5699,2.50068) 101 -dictGet test_01037.dict_array (29.5796,1.55456) 101 -dictGet test_01037.dict_array (29.5796,2.36864) 101 -dictGet test_01037.dict_array (29.5844,1.59626) 101 -dictGet test_01037.dict_array (29.5886,4.03321) 101 -dictGet test_01037.dict_array (29.5914,3.02628) 101 -dictGet test_01037.dict_array (29.5926,-0.0965169) 101 -dictGet test_01037.dict_array (29.5968,2.37773) 101 -dictGet test_01037.dict_array (29.5984,0.755853) 101 -dictGet test_01037.dict_array (29.6066,3.47173) 101 -dictGet test_01037.dict_array (29.6085,-1.26007) 101 -dictGet test_01037.dict_array (29.6131,0.246565) 101 -dictGet test_01037.dict_array (29.6157,-0.266687) 101 -dictGet test_01037.dict_array (29.6164,2.94674) 101 -dictGet test_01037.dict_array (29.6195,-0.591941) 101 -dictGet test_01037.dict_array (29.6231,1.54818) 101 -dictGet test_01037.dict_array (29.6379,0.764114) 101 -dictGet test_01037.dict_array (29.6462,-0.772059) 934570 -dictGet test_01037.dict_array (29.6579,-1.07336) 101 -dictGet test_01037.dict_array (29.6618,-0.271842) 101 -dictGet test_01037.dict_array (29.6629,-0.303602) 101 -dictGet test_01037.dict_array (29.6659,-0.782823) 934570 -dictGet test_01037.dict_array (29.6736,-0.113832) 101 -dictGet test_01037.dict_array (29.6759,3.02905) 101 -dictGet test_01037.dict_array (29.6778,3.71898) 101 -dictGet test_01037.dict_array (29.6796,1.10433) 101 -dictGet test_01037.dict_array (29.6809,2.13677) 101 -dictGet test_01037.dict_array (29.6935,4.11894) 101 -dictGet test_01037.dict_array (29.6991,-1.4458199999999999) 101 -dictGet test_01037.dict_array (29.6997,3.17297) 101 -dictGet test_01037.dict_array (29.7043,3.6145899999999997) 101 -dictGet test_01037.dict_array (29.7065,3.24885) 101 -dictGet test_01037.dict_array (29.7126,0.28108) 101 -dictGet test_01037.dict_array (29.7192,0.174273) 101 -dictGet test_01037.dict_array (29.7217,-0.523481) 934570 -dictGet test_01037.dict_array (29.7271,1.67967) 101 -dictGet test_01037.dict_array (29.7311,4.12444) 101 -dictGet test_01037.dict_array (29.7347,1.88378) 101 -dictGet test_01037.dict_array (29.7358,0.67944) 101 -dictGet test_01037.dict_array (29.7366,-0.2973) 101 -dictGet test_01037.dict_array (29.7446,0.646536) 101 -dictGet test_01037.dict_array (29.7453,-0.567963) 101 -dictGet test_01037.dict_array (29.764,4.04217) 101 -dictGet test_01037.dict_array (29.7655,1.51372) 101 -dictGet test_01037.dict_array (29.7744,1.12435) 101 -dictGet test_01037.dict_array (29.7774,-0.0681196) 101 -dictGet test_01037.dict_array (29.7784,1.54864) 101 -dictGet test_01037.dict_array (29.7785,2.24139) 101 -dictGet test_01037.dict_array (29.7922,0.220808) 101 -dictGet test_01037.dict_array (29.7936,2.37709) 101 -dictGet test_01037.dict_array (29.8008,0.948536) 101 -dictGet test_01037.dict_array (29.8115,0.201227) 101 -dictGet test_01037.dict_array (29.814,0.149601) 101 -dictGet test_01037.dict_array (29.8193,-1.35858) 101 -dictGet test_01037.dict_array (29.8201,0.965518) 101 -dictGet test_01037.dict_array (29.8265,-0.727286) 101 -dictGet test_01037.dict_array (29.8277,-0.531746) 101 -dictGet test_01037.dict_array (29.8289,3.63009) 101 -dictGet test_01037.dict_array (29.8548,0.838047) 101 -dictGet test_01037.dict_array (29.8641,-0.845265) 101 -dictGet test_01037.dict_array (29.8649,0.0562212) 101 -dictGet test_01037.dict_array (29.8701,-1.02045) 101 -dictGet test_01037.dict_array (29.8733,2.76654) 101 -dictGet test_01037.dict_array (29.876,0.555475) 101 -dictGet test_01037.dict_array (29.8794,-0.800108) 101 -dictGet test_01037.dict_array (29.8813,2.7426399999999997) 101 -dictGet test_01037.dict_array (29.897100000000002,2.66193) 101 -dictGet test_01037.dict_array (29.908,4.01339) 101 -dictGet test_01037.dict_array (29.9165,-1.08246) 101 -dictGet test_01037.dict_array (29.9201,-0.420861) 101 -dictGet test_01037.dict_array (29.9217,3.03778) 101 -dictGet test_01037.dict_array (29.9355,0.773833) 101 -dictGet test_01037.dict_array (29.947,3.76517) 101 -dictGet test_01037.dict_array (29.9518,-0.60557) 101 -dictGet test_01037.dict_array (29.9564,-0.600163) 101 -dictGet test_01037.dict_array (29.959600000000002,4.16591) 101 -dictGet test_01037.dict_array (29.9615,-1.33708) 101 -dictGet test_01037.dict_array (29.9699,-0.392375) 101 -dictGet test_01037.dict_array (29.9776,1.04552) 101 -dictGet test_01037.dict_array (29.9784,4.02756) 101 -dictGet test_01037.dict_array (29.9819,4.00597) 101 -dictGet test_01037.dict_array (29.9826,1.2816100000000001) 101 -dictGet test_01037.dict_array (30.0026,2.76257) 101 -dictGet test_01037.dict_array (30.0126,3.68255) 101 -dictGet test_01037.dict_array (30.0131,0.796576) 101 -dictGet test_01037.dict_array (30.018,1.16523) 101 -dictGet test_01037.dict_array (30.0261,-0.210653) 101 -dictGet test_01037.dict_array (30.0472,-1.11007) 101 -dictGet test_01037.dict_array (30.0542,-0.479585) 101 -dictGet test_01037.dict_array (30.0613,1.6278000000000001) 101 -dictGet test_01037.dict_array (30.0617,-0.0551152) 101 -dictGet test_01037.dict_array (30.0637,2.62066) 101 -dictGet test_01037.dict_array (30.0721,1.6424400000000001) 101 -dictGet test_01037.dict_array (30.0769,-0.402636) 101 -dictGet test_01037.dict_array (30.0791,-0.277435) 101 -dictGet test_01037.dict_array (30.0931,0.0327512) 101 -dictGet test_01037.dict_array (30.1059,3.52623) 101 -dictGet test_01037.dict_array (30.1103,0.865466) 101 -dictGet test_01037.dict_array (30.1115,2.95243) 101 -dictGet test_01037.dict_array (30.1144,1.71029) 101 -dictGet test_01037.dict_array (30.1311,-0.864751) 101 -dictGet test_01037.dict_array (30.1336,-0.851386) 101 -dictGet test_01037.dict_array (30.1393,3.89901) 101 -dictGet test_01037.dict_array (30.1456,-0.531898) 101 -dictGet test_01037.dict_array (30.1492,2.07833) 101 -dictGet test_01037.dict_array (30.1575,2.43856) 101 -dictGet test_01037.dict_array (30.1682,1.19771) 101 -dictGet test_01037.dict_array (30.1716,3.9853300000000003) 101 -dictGet test_01037.dict_array (30.1849,2.78374) 101 -dictGet test_01037.dict_array (30.1866,0.65658) 101 -dictGet test_01037.dict_array (30.1885,1.56943) 101 -dictGet test_01037.dict_array (30.1959,-1.38202) 101 -dictGet test_01037.dict_array (30.1999,1.58413) 101 -dictGet test_01037.dict_array (30.2024,0.713081) 101 -dictGet test_01037.dict_array (30.2054,0.620143) 101 -dictGet test_01037.dict_array (30.2091,1.51641) 101 -dictGet test_01037.dict_array (30.2124,-0.331782) 101 -dictGet test_01037.dict_array (30.226,3.03527) 101 -dictGet test_01037.dict_array (30.2261,3.18486) 101 -dictGet test_01037.dict_array (30.2288,2.48407) 101 -dictGet test_01037.dict_array (30.2345,3.7462400000000002) 101 -dictGet test_01037.dict_array (30.2375,0.62046) 101 -dictGet test_01037.dict_array (30.2425,-0.472914) 101 -dictGet test_01037.dict_array (30.247,3.95863) 101 -dictGet test_01037.dict_array (30.2494,-0.305093) 101 -dictGet test_01037.dict_array (30.2499,2.54337) 101 -dictGet test_01037.dict_array (30.2606,2.16644) 101 -dictGet test_01037.dict_array (30.2672,3.94847) 101 -dictGet test_01037.dict_array (30.2709,-0.136264) 101 -dictGet test_01037.dict_array (30.2764,1.18654) 101 -dictGet test_01037.dict_array (30.2765,1.20383) 101 -dictGet test_01037.dict_array (30.2839,1.05762) 101 -dictGet test_01037.dict_array (30.286,0.469327) 101 -dictGet test_01037.dict_array (30.2927,3.1693) 101 -dictGet test_01037.dict_array (30.2935,3.49854) 101 -dictGet test_01037.dict_array (30.307,0.312338) 101 -dictGet test_01037.dict_array (30.3085,1.07791) 101 -dictGet test_01037.dict_array (30.3139,2.77248) 101 -dictGet test_01037.dict_array (30.314,0.822823) 101 -dictGet test_01037.dict_array (30.3227,-0.587351) 101 -dictGet test_01037.dict_array (30.332,1.00174) 101 -dictGet test_01037.dict_array (30.3388,0.844148) 101 -dictGet test_01037.dict_array (30.3485,0.561902) 101 -dictGet test_01037.dict_array (30.3497,0.180362) 101 -dictGet test_01037.dict_array (30.361,4.13016) 101 -dictGet test_01037.dict_array (30.3623,-0.0484027) 101 -dictGet test_01037.dict_array (30.3638,3.9845800000000002) 101 -dictGet test_01037.dict_array (30.3853,3.16051) 101 -dictGet test_01037.dict_array (30.3974,2.6617800000000003) 101 -dictGet test_01037.dict_array (30.4002,-1.15886) 101 -dictGet test_01037.dict_array (30.4008,-0.387015) 101 -dictGet test_01037.dict_array (30.4018,1.86493) 101 -dictGet test_01037.dict_array (30.4239,1.16818) 101 -dictGet test_01037.dict_array (30.4363,3.63938) 101 -dictGet test_01037.dict_array (30.4377,-0.81315) 101 -dictGet test_01037.dict_array (30.4391,3.54703) 101 -dictGet test_01037.dict_array (30.4424,-1.39435) 101 -dictGet test_01037.dict_array (30.4441,2.8463000000000003) 101 -dictGet test_01037.dict_array (30.4517,3.28117) 101 -dictGet test_01037.dict_array (30.4658,2.6928) 101 -dictGet test_01037.dict_array (30.4734,2.66161) 101 -dictGet test_01037.dict_array (30.4799,-1.07578) 101 -dictGet test_01037.dict_array (30.4837,-1.02486) 101 -dictGet test_01037.dict_array (30.485,1.06326) 101 -dictGet test_01037.dict_array (30.495,1.12306) 101 -dictGet test_01037.dict_array (30.501,2.27264) 101 -dictGet test_01037.dict_array (30.5027,1.99382) 101 -dictGet test_01037.dict_array (30.5194,-1.03943) 101 -dictGet test_01037.dict_array (30.5239,1.04328) 101 -dictGet test_01037.dict_array (30.528,3.82041) 101 -dictGet test_01037.dict_array (30.5299,-0.715248) 101 -dictGet test_01037.dict_array (30.5331,1.19603) 101 -dictGet test_01037.dict_array (30.535800000000002,2.71485) 101 -dictGet test_01037.dict_array (30.5405,0.804694) 101 -dictGet test_01037.dict_array (30.542,1.23739) 101 -dictGet test_01037.dict_array (30.5432,4.04189) 101 -dictGet test_01037.dict_array (30.5457,-0.956121) 101 -dictGet test_01037.dict_array (30.5506,3.07443) 101 -dictGet test_01037.dict_array (30.5539,3.87084) 101 -dictGet test_01037.dict_array (30.5578,3.78837) 101 -dictGet test_01037.dict_array (30.5588,0.966135) 101 -dictGet test_01037.dict_array (30.5637,2.5605) 101 -dictGet test_01037.dict_array (30.5647,-1.27328) 101 -dictGet test_01037.dict_array (30.5656,-0.0581332) 101 -dictGet test_01037.dict_array (30.5715,0.65755) 101 -dictGet test_01037.dict_array (30.5727,3.01604) 101 -dictGet test_01037.dict_array (30.5729,-0.976857) 101 -dictGet test_01037.dict_array (30.5751,0.60204) 101 -dictGet test_01037.dict_array (30.5854,3.02473) 101 -dictGet test_01037.dict_array (30.5866,0.174099) 101 -dictGet test_01037.dict_array (30.5947,0.875193) 101 -dictGet test_01037.dict_array (30.5992,-0.403901) 101 -dictGet test_01037.dict_array (30.6002,4.18891) 101 -dictGet test_01037.dict_array (30.6025,0.217712) 101 -dictGet test_01037.dict_array (30.6054,0.927203) 101 -dictGet test_01037.dict_array (30.6075,3.79359) 101 -dictGet test_01037.dict_array (30.6159,3.82773) 101 -dictGet test_01037.dict_array (30.627,3.84039) 101 -dictGet test_01037.dict_array (30.6308,0.77517) 101 -dictGet test_01037.dict_array (30.6338,0.179565) 101 -dictGet test_01037.dict_array (30.6461,1.3293599999999999) 101 -dictGet test_01037.dict_array (30.6674,-0.424547) 101 -dictGet test_01037.dict_array (30.669,1.76539) 101 -dictGet test_01037.dict_array (30.6788,4.01239) 101 -dictGet test_01037.dict_array (30.6864,3.59158) 101 -dictGet test_01037.dict_array (30.7049,-0.875413) 101 -dictGet test_01037.dict_array (30.705,1.3307) 101 -dictGet test_01037.dict_array (30.7063,-0.473192) 101 -dictGet test_01037.dict_array (30.7075,-1.1958199999999999) 101 -dictGet test_01037.dict_array (30.7101,-0.367562) 101 -dictGet test_01037.dict_array (30.7203,2.98725) 101 -dictGet test_01037.dict_array (30.7213,2.2745699999999998) 101 -dictGet test_01037.dict_array (30.7446,-0.334144) 101 -dictGet test_01037.dict_array (30.7468,3.82967) 101 -dictGet test_01037.dict_array (30.747,-0.384779) 101 -dictGet test_01037.dict_array (30.7681,0.904198) 101 -dictGet test_01037.dict_array (30.7757,1.78743) 101 -dictGet test_01037.dict_array (30.8021,-0.479212) 101 -dictGet test_01037.dict_array (30.8079,-1.40869) 101 -dictGet test_01037.dict_array (30.8206,-0.0608489) 101 -dictGet test_01037.dict_array (30.8218,0.43909) 101 -dictGet test_01037.dict_array (30.8239,0.10014) 101 -dictGet test_01037.dict_array (30.8282,4.15409) 101 -dictGet test_01037.dict_array (30.8288,-0.709528) 101 -dictGet test_01037.dict_array (30.8326,0.156011) 101 -dictGet test_01037.dict_array (30.8328,-1.03704) 101 -dictGet test_01037.dict_array (30.839,2.15528) 101 -dictGet test_01037.dict_array (30.8452,0.219377) 101 -dictGet test_01037.dict_array (30.8463,0.0515355) 101 -dictGet test_01037.dict_array (30.8526,2.06614) 101 -dictGet test_01037.dict_array (30.8566,0.517876) 101 -dictGet test_01037.dict_array (30.8588,-1.31738) 101 -dictGet test_01037.dict_array (30.8681,0.44207) 101 -dictGet test_01037.dict_array (30.8914,1.0072) 101 -dictGet test_01037.dict_array (30.897,0.483425) 101 -dictGet test_01037.dict_array (30.905,2.8731999999999998) 101 -dictGet test_01037.dict_array (30.9051,2.21956) 101 -dictGet test_01037.dict_array (30.9115,4.00663) 101 -dictGet test_01037.dict_array (30.9167,-0.834462) 101 -dictGet test_01037.dict_array (30.9252,-1.3289900000000001) 101 -dictGet test_01037.dict_array (30.9314,1.85384) 101 -dictGet test_01037.dict_array (30.9392,2.53236) 101 -dictGet test_01037.dict_array (30.9569,2.82038) 101 -dictGet test_01037.dict_array (30.9598,-0.641011) 101 -dictGet test_01037.dict_array (30.9601,-0.254928) 101 -dictGet test_01037.dict_array (30.9623,-1.3886) 101 -dictGet test_01037.dict_array (30.9707,0.888854) 101 -dictGet test_01037.dict_array (30.9766,2.81957) 101 -dictGet test_01037.dict_array (30.9775,2.69273) 101 -dictGet test_01037.dict_array (30.9821,0.587715) 101 -dictGet test_01037.dict_array (30.9887,4.0233) 101 -dictGet test_01037.dict_array (30.9914,0.259542) 101 -dictGet test_01037.dict_array (30.9986,-1.36832) 101 -dictGet test_01037.dict_array (31.008,0.628999) 101 -dictGet test_01037.dict_array (31.0168,-1.17462) 101 -dictGet test_01037.dict_array (31.0237,3.52547) 101 -dictGet test_01037.dict_array (31.0306,3.78522) 101 -dictGet test_01037.dict_array (31.0308,-0.72453) 101 -dictGet test_01037.dict_array (31.0463,2.41997) 101 -dictGet test_01037.dict_array (31.047,0.624184) 101 -dictGet test_01037.dict_array (31.0569,0.0706393) 5994232 -dictGet test_01037.dict_array (31.0583,1.3244099999999999) 101 -dictGet test_01037.dict_array (31.063,3.23861) 101 -dictGet test_01037.dict_array (31.068,0.695575) 101 -dictGet test_01037.dict_array (31.0687,1.85675) 101 -dictGet test_01037.dict_array (31.0692,0.254793) 101 -dictGet test_01037.dict_array (31.0766,0.828128) 101 -dictGet test_01037.dict_array (31.0833,0.0612782) 5994232 -dictGet test_01037.dict_array (31.0833,2.59748) 101 -dictGet test_01037.dict_array (31.0861,-1.3778299999999999) 101 -dictGet test_01037.dict_array (31.0874,3.07258) 101 -dictGet test_01037.dict_array (31.0882,1.4882) 101 -dictGet test_01037.dict_array (31.0924,3.42242) 101 -dictGet test_01037.dict_array (31.0927,2.67448) 101 -dictGet test_01037.dict_array (31.0936,1.12292) 101 -dictGet test_01037.dict_array (31.0952,-0.336928) 101 -dictGet test_01037.dict_array (31.0978,3.48482) 101 -dictGet test_01037.dict_array (31.1107,3.7513199999999998) 101 -dictGet test_01037.dict_array (31.1156,1.19171) 101 -dictGet test_01037.dict_array (31.1176,0.223509) 5994232 -dictGet test_01037.dict_array (31.1249,0.946838) 101 -dictGet test_01037.dict_array (31.1267,1.48983) 101 -dictGet test_01037.dict_array (31.138,-0.289981) 101 -dictGet test_01037.dict_array (31.1382,3.02904) 101 -dictGet test_01037.dict_array (31.1475,2.6178) 101 -dictGet test_01037.dict_array (31.1491,1.37873) 101 -dictGet test_01037.dict_array (31.1525,3.72105) 101 -dictGet test_01037.dict_array (31.1526,-1.4129800000000001) 101 -dictGet test_01037.dict_array (31.1526,-0.186457) 101 -dictGet test_01037.dict_array (31.1539,2.78789) 101 -dictGet test_01037.dict_array (31.1548,-1.08552) 101 -dictGet test_01037.dict_array (31.1567,-0.0768925) 101 -dictGet test_01037.dict_array (31.1613,1.49617) 101 -dictGet test_01037.dict_array (31.1653,1.03777) 101 -dictGet test_01037.dict_array (31.1662,3.4214700000000002) 101 -dictGet test_01037.dict_array (31.1672,-0.0813169) 101 -dictGet test_01037.dict_array (31.177,0.440843) 101 -dictGet test_01037.dict_array (31.1788,-0.737151) 101 -dictGet test_01037.dict_array (31.1856,-0.144396) 101 -dictGet test_01037.dict_array (31.1959,3.66813) 101 -dictGet test_01037.dict_array (31.1996,-0.353983) 101 -dictGet test_01037.dict_array (31.2019,2.86802) 101 -dictGet test_01037.dict_array (31.2087,2.31245) 101 -dictGet test_01037.dict_array (31.2125,3.2713200000000002) 101 -dictGet test_01037.dict_array (31.2137,-0.108129) 101 -dictGet test_01037.dict_array (31.216,3.9156) 101 -dictGet test_01037.dict_array (31.2201,-0.202141) 101 -dictGet test_01037.dict_array (31.2285,2.09058) 101 -dictGet test_01037.dict_array (31.2502,4.01526) 101 -dictGet test_01037.dict_array (31.2585,3.11524) 101 -dictGet test_01037.dict_array (31.2645,-0.620418) 101 -dictGet test_01037.dict_array (31.2684,2.74277) 101 -dictGet test_01037.dict_array (31.2821,-1.12772) 101 -dictGet test_01037.dict_array (31.2821,2.46769) 101 -dictGet test_01037.dict_array (31.2887,3.91396) 101 -dictGet test_01037.dict_array (31.295,1.49942) 101 -dictGet test_01037.dict_array (31.2997,3.46122) 101 -dictGet test_01037.dict_array (31.3017,3.3263) 101 -dictGet test_01037.dict_array (31.3022,3.16754) 101 -dictGet test_01037.dict_array (31.3048,0.364962) 101 -dictGet test_01037.dict_array (31.305,3.1967) 101 -dictGet test_01037.dict_array (31.3061,1.84303) 101 -dictGet test_01037.dict_array (31.3082,-0.173851) 101 -dictGet test_01037.dict_array (31.3315,3.90932) 101 -dictGet test_01037.dict_array (31.3351,2.80164) 101 -dictGet test_01037.dict_array (31.3388,0.168765) 5994233 -dictGet test_01037.dict_array (31.339,0.25535) 101 -dictGet test_01037.dict_array (31.3423,1.7036799999999999) 101 -dictGet test_01037.dict_array (31.349,0.386456) 101 -dictGet test_01037.dict_array (31.3558,-1.04336) 101 -dictGet test_01037.dict_array (31.3564,0.478876) 101 -dictGet test_01037.dict_array (31.3607,-0.0860507) 5994233 -dictGet test_01037.dict_array (31.3831,3.84469) 101 -dictGet test_01037.dict_array (31.3886,-0.731137) 101 -dictGet test_01037.dict_array (31.4043,-0.348907) 101 -dictGet test_01037.dict_array (31.4081,1.47391) 101 -dictGet test_01037.dict_array (31.4176,-0.583645) 101 -dictGet test_01037.dict_array (31.4177,1.36972) 101 -dictGet test_01037.dict_array (31.4182,0.958303) 101 -dictGet test_01037.dict_array (31.4199,3.1738) 101 -dictGet test_01037.dict_array (31.4221,2.74876) 101 -dictGet test_01037.dict_array (31.4301,-0.122643) 5994233 -dictGet test_01037.dict_array (31.4344,1.00661) 101 -dictGet test_01037.dict_array (31.4375,4.20304) 101 -dictGet test_01037.dict_array (31.4377,0.289608) 101 -dictGet test_01037.dict_array (31.4379,0.54744) 101 -dictGet test_01037.dict_array (31.4459,3.94945) 101 -dictGet test_01037.dict_array (31.4559,-0.345063) 101 -dictGet test_01037.dict_array (31.464,0.726129) 101 -dictGet test_01037.dict_array (31.4662,-0.299019) 5994233 -dictGet test_01037.dict_array (31.4671,1.9605299999999999) 101 -dictGet test_01037.dict_array (31.4673,-0.403676) 101 -dictGet test_01037.dict_array (31.4712,-0.237941) 5994233 -dictGet test_01037.dict_array (31.4816,0.120264) 5994233 -dictGet test_01037.dict_array (31.4875,0.323483) 101 -dictGet test_01037.dict_array (31.490099999999998,-0.338163) 101 -dictGet test_01037.dict_array (31.4932,0.517674) 101 -dictGet test_01037.dict_array (31.5112,1.9689299999999998) 101 -dictGet test_01037.dict_array (31.5122,2.92785) 101 -dictGet test_01037.dict_array (31.5151,0.166429) 101 -dictGet test_01037.dict_array (31.5174,2.94802) 101 -dictGet test_01037.dict_array (31.5182,4.18776) 101 -dictGet test_01037.dict_array (31.5238,1.18793) 101 -dictGet test_01037.dict_array (31.5271,3.07446) 101 -dictGet test_01037.dict_array (31.5393,1.58061) 101 -dictGet test_01037.dict_array (31.5421,3.13711) 101 -dictGet test_01037.dict_array (31.5479,2.39897) 101 -dictGet test_01037.dict_array (31.5519,0.99285) 101 -dictGet test_01037.dict_array (31.5685,3.47987) 101 -dictGet test_01037.dict_array (31.5959,0.437382) 101 -dictGet test_01037.dict_array (31.6003,0.194376) 101 -dictGet test_01037.dict_array (31.6026,2.15457) 101 -dictGet test_01037.dict_array (31.606,2.45365) 101 -dictGet test_01037.dict_array (31.6062,-0.453441) 101 -dictGet test_01037.dict_array (31.6107,1.35247) 101 -dictGet test_01037.dict_array (31.6155,3.85588) 101 -dictGet test_01037.dict_array (31.6222,2.03326) 101 -dictGet test_01037.dict_array (31.6231,-0.123059) 101 -dictGet test_01037.dict_array (31.6244,1.6885599999999998) 101 -dictGet test_01037.dict_array (31.6459,0.669716) 101 -dictGet test_01037.dict_array (31.6563,-0.0644741) 101 -dictGet test_01037.dict_array (31.6618,-0.551121) 101 -dictGet test_01037.dict_array (31.6725,-0.38922) 101 -dictGet test_01037.dict_array (31.6727,4.10336) 101 -dictGet test_01037.dict_array (31.6739,4.1391) 101 -dictGet test_01037.dict_array (31.6897,2.8694699999999997) 101 -dictGet test_01037.dict_array (31.6902,3.98792) 101 -dictGet test_01037.dict_array (31.6945,2.46687) 101 -dictGet test_01037.dict_array (31.6987,-1.3796) 101 -dictGet test_01037.dict_array (31.7012,2.34845) 101 -dictGet test_01037.dict_array (31.7036,0.0228348) 101 -dictGet test_01037.dict_array (31.7046,3.68111) 101 -dictGet test_01037.dict_array (31.7055,2.92556) 101 -dictGet test_01037.dict_array (31.7102,1.04532) 101 -dictGet test_01037.dict_array (31.7149,-0.443302) 101 -dictGet test_01037.dict_array (31.7195,2.99311) 101 -dictGet test_01037.dict_array (31.7274,0.166719) 101 -dictGet test_01037.dict_array (31.7565,-0.565382) 101 -dictGet test_01037.dict_array (31.7615,0.771626) 101 -dictGet test_01037.dict_array (31.7739,1.8970099999999999) 101 -dictGet test_01037.dict_array (31.7848,1.2623199999999999) 101 -dictGet test_01037.dict_array (31.7912,-0.788599) 101 -dictGet test_01037.dict_array (31.8011,2.65853) 101 -dictGet test_01037.dict_array (31.8032,-0.0590108) 101 -dictGet test_01037.dict_array (31.8038,1.9618799999999998) 101 -dictGet test_01037.dict_array (31.8098,-1.46851) 101 -dictGet test_01037.dict_array (31.8131,3.41982) 101 -dictGet test_01037.dict_array (31.8169,3.31059) 101 -dictGet test_01037.dict_array (31.8202,-0.193692) 101 -dictGet test_01037.dict_array (31.8306,1.57586) 101 -dictGet test_01037.dict_array (31.8382,-0.787948) 101 -dictGet test_01037.dict_array (31.8433,2.49692) 101 -dictGet test_01037.dict_array (31.8436,2.41851) 101 -dictGet test_01037.dict_array (31.8563,-1.10787) 101 -dictGet test_01037.dict_array (31.8683,0.996504) 101 -dictGet test_01037.dict_array (31.8693,-0.828142) 101 -dictGet test_01037.dict_array (31.8723,1.08929) 101 -dictGet test_01037.dict_array (31.8737,0.881127) 101 -dictGet test_01037.dict_array (31.8881,-0.58441) 101 -dictGet test_01037.dict_array (31.9011,0.121349) 101 -dictGet test_01037.dict_array (31.9066,2.13045) 101 -dictGet test_01037.dict_array (31.9142,1.03368) 101 -dictGet test_01037.dict_array (31.9155,3.38363) 101 -dictGet test_01037.dict_array (31.9168,1.3166) 101 -dictGet test_01037.dict_array (31.9185,-1.11879) 101 -dictGet test_01037.dict_array (31.9186,-0.647948) 101 -dictGet test_01037.dict_array (31.9311,3.96928) 101 -dictGet test_01037.dict_array (31.9335,1.47048) 101 -dictGet test_01037.dict_array (31.9443,-1.36175) 101 -dictGet test_01037.dict_array (31.9481,2.34231) 101 -dictGet test_01037.dict_array (31.9526,1.36565) 101 -dictGet test_01037.dict_array (31.9629,2.5208399999999997) 101 -dictGet test_01037.dict_array (31.9765,0.975783) 101 -dictGet test_01037.dict_array (31.9923,3.31773) 101 -dictGet test_01037.dict_array (31.9994,0.972816) 101 -dictGet test_01037.dict_array (32.001,3.47425) 101 -dictGet test_01037.dict_array (32.0127,2.13874) 101 -dictGet test_01037.dict_array (32.0244,3.2092) 101 -dictGet test_01037.dict_array (32.029,1.18039) 101 -dictGet test_01037.dict_array (32.0315,0.566073) 101 -dictGet test_01037.dict_array (32.0354,1.0766499999999999) 101 -dictGet test_01037.dict_array (32.0399,-1.11576) 101 -dictGet test_01037.dict_array (32.053,2.16849) 101 -dictGet test_01037.dict_array (32.0542,0.042328) 101 -dictGet test_01037.dict_array (32.0576,2.47001) 101 -dictGet test_01037.dict_array (32.061,3.7498899999999997) 101 -dictGet test_01037.dict_array (32.0623,1.25134) 101 -dictGet test_01037.dict_array (32.0626,1.9611399999999999) 101 -dictGet test_01037.dict_array (32.0666,-0.0904247) 101 -dictGet test_01037.dict_array (32.0681,2.28442) 101 -dictGet test_01037.dict_array (32.0692,1.50869) 101 -dictGet test_01037.dict_array (32.0724,4.03314) 101 -dictGet test_01037.dict_array (32.0729,-0.064324) 101 -dictGet test_01037.dict_array (32.079,0.293758) 101 -dictGet test_01037.dict_array (32.0847,-1.19814) 101 -dictGet test_01037.dict_array (32.0974,-0.91927) 101 -dictGet test_01037.dict_array (32.0979,-0.736979) 101 -dictGet test_01037.dict_array (32.106,-1.33063) 101 -dictGet test_01037.dict_array (32.1189,0.246715) 101 -dictGet test_01037.dict_array (32.1207,4.00883) 101 -dictGet test_01037.dict_array (32.1396,1.12402) 101 -dictGet test_01037.dict_array (32.1413,1.5668) 101 -dictGet test_01037.dict_array (32.143,1.35559) 101 -dictGet test_01037.dict_array (32.1538,1.32881) 101 -dictGet test_01037.dict_array (32.1549,4.06552) 101 -dictGet test_01037.dict_array (32.1555,-0.79275) 101 -dictGet test_01037.dict_array (32.163,1.17733) 101 -dictGet test_01037.dict_array (32.1634,2.94273) 101 -dictGet test_01037.dict_array (32.1644,1.85666) 101 -dictGet test_01037.dict_array (32.1745,0.435458) 101 -dictGet test_01037.dict_array (32.1765,1.65149) 101 -dictGet test_01037.dict_array (32.1893,2.08924) 101 -dictGet test_01037.dict_array (32.2024,0.222191) 101 -dictGet test_01037.dict_array (32.2107,1.34379) 101 -dictGet test_01037.dict_array (32.2109,3.9018699999999997) 101 -dictGet test_01037.dict_array (32.2123,1.85233) 101 -dictGet test_01037.dict_array (32.2144,3.72534) 101 -dictGet test_01037.dict_array (32.2218,2.5386699999999998) 101 -dictGet test_01037.dict_array (32.2279,2.84267) 101 -dictGet test_01037.dict_array (32.2345,3.33295) 101 -dictGet test_01037.dict_array (32.2435,3.85283) 101 -dictGet test_01037.dict_array (32.2527,-0.480608) 101 -dictGet test_01037.dict_array (32.2566,-0.837882) 101 -dictGet test_01037.dict_array (32.2627,2.57708) 101 -dictGet test_01037.dict_array (32.2733,0.244931) 101 -dictGet test_01037.dict_array (32.2761,4.05808) 101 -dictGet test_01037.dict_array (32.2764,3.78472) 101 -dictGet test_01037.dict_array (32.2814,-1.26011) 101 -dictGet test_01037.dict_array (32.2861,3.02427) 101 -dictGet test_01037.dict_array (32.2924,0.928609) 101 -dictGet test_01037.dict_array (32.2963,-0.78543) 101 -dictGet test_01037.dict_array (32.3039,3.21175) 101 -dictGet test_01037.dict_array (32.3107,0.698287) 101 -dictGet test_01037.dict_array (32.3138,0.0595677) 101 -dictGet test_01037.dict_array (32.3339,0.707056) 101 -dictGet test_01037.dict_array (32.3351,0.415474) 101 -dictGet test_01037.dict_array (32.342,-0.681023) 101 -dictGet test_01037.dict_array (32.3463,1.83196) 101 -dictGet test_01037.dict_array (32.3494,2.43799) 101 -dictGet test_01037.dict_array (32.3524,3.47049) 101 -dictGet test_01037.dict_array (32.3531,2.33115) 101 -dictGet test_01037.dict_array (32.3602,0.116106) 101 -dictGet test_01037.dict_array (32.3612,1.1598) 101 -dictGet test_01037.dict_array (32.3689,3.34847) 101 -dictGet test_01037.dict_array (32.3695,0.734055) 101 -dictGet test_01037.dict_array (32.3825,3.85017) 101 -dictGet test_01037.dict_array (32.3835,-1.25491) 101 -dictGet test_01037.dict_array (32.4018,-0.728568) 101 -dictGet test_01037.dict_array (32.4044,2.96727) 101 -dictGet test_01037.dict_array (32.4101,2.9988) 101 -dictGet test_01037.dict_array (32.417,-1.12908) 101 -dictGet test_01037.dict_array (32.4172,4.1952) 101 -dictGet test_01037.dict_array (32.4239,2.49512) 101 -dictGet test_01037.dict_array (32.4258,4.05137) 101 -dictGet test_01037.dict_array (32.4264,-0.427357) 101 -dictGet test_01037.dict_array (32.4274,3.59377) 101 -dictGet test_01037.dict_array (32.4286,-1.24757) 101 -dictGet test_01037.dict_array (32.4294,3.0665) 101 -dictGet test_01037.dict_array (32.4333,-0.353347) 101 -dictGet test_01037.dict_array (32.4391,3.64421) 101 -dictGet test_01037.dict_array (32.4401,3.70635) 101 -dictGet test_01037.dict_array (32.45,1.68918) 101 -dictGet test_01037.dict_array (32.4507,-0.133471) 101 -dictGet test_01037.dict_array (32.4592,0.976458) 101 -dictGet test_01037.dict_array (32.4595,1.89135) 101 -dictGet test_01037.dict_array (32.4604,0.280248) 101 -dictGet test_01037.dict_array (32.4835,0.472731) 101 -dictGet test_01037.dict_array (32.4855,2.01938) 101 -dictGet test_01037.dict_array (32.4872,2.01697) 101 -dictGet test_01037.dict_array (32.4911,0.613106) 101 -dictGet test_01037.dict_array (32.4918,2.17834) 101 -dictGet test_01037.dict_array (32.4947,2.34595) 101 -dictGet test_01037.dict_array (32.5035,2.92234) 101 -dictGet test_01037.dict_array (32.5132,-0.331206) 101 -dictGet test_01037.dict_array (32.5156,-0.412604) 7652581 -dictGet test_01037.dict_array (32.5158,2.9067499999999997) 101 -dictGet test_01037.dict_array (32.5249,2.44519) 101 -dictGet test_01037.dict_array (32.5293,-0.790952) 101 -dictGet test_01037.dict_array (32.5319,3.96854) 101 -dictGet test_01037.dict_array (32.5518,3.6093) 101 -dictGet test_01037.dict_array (32.5541,3.5225400000000002) 101 -dictGet test_01037.dict_array (32.5569,0.816123) 101 -dictGet test_01037.dict_array (32.5646,1.9775) 101 -dictGet test_01037.dict_array (32.5733,3.81271) 101 -dictGet test_01037.dict_array (32.5767,0.948327) 101 -dictGet test_01037.dict_array (32.5971,1.76179) 101 -dictGet test_01037.dict_array (32.6035,-0.716157) 101 -dictGet test_01037.dict_array (32.6087,4.21614) 101 -dictGet test_01037.dict_array (32.6171,0.024481) 101 -dictGet test_01037.dict_array (32.6189,-0.775391) 101 -dictGet test_01037.dict_array (32.6198,2.92081) 101 -dictGet test_01037.dict_array (32.621,-0.970784) 101 -dictGet test_01037.dict_array (32.6266,0.650009) 101 -dictGet test_01037.dict_array (32.6315,2.15144) 101 -dictGet test_01037.dict_array (32.6385,-0.436803) 101 -dictGet test_01037.dict_array (32.6449,-0.191292) 101 -dictGet test_01037.dict_array (32.6535,2.10385) 101 -dictGet test_01037.dict_array (32.6592,3.49973) 101 -dictGet test_01037.dict_array (32.6598,2.5980600000000003) 101 -dictGet test_01037.dict_array (32.6612,2.95681) 101 -dictGet test_01037.dict_array (32.6636,-0.57235) 101 -dictGet test_01037.dict_array (32.669,-0.382702) 101 -dictGet test_01037.dict_array (32.6752,1.30748) 101 -dictGet test_01037.dict_array (32.6811,2.9559800000000003) 101 -dictGet test_01037.dict_array (32.6821,0.57336) 101 -dictGet test_01037.dict_array (32.6828,3.91304) 101 -dictGet test_01037.dict_array (32.6979,3.96868) 101 -dictGet test_01037.dict_array (32.6983,3.15784) 101 -dictGet test_01037.dict_array (32.7122,0.794293) 101 -dictGet test_01037.dict_array (32.7131,-0.847256) 101 -dictGet test_01037.dict_array (32.7219,0.883461) 101 -dictGet test_01037.dict_array (32.7228,1.78808) 101 -dictGet test_01037.dict_array (32.7273,-0.206908) 101 -dictGet test_01037.dict_array (32.7292,0.259331) 101 -dictGet test_01037.dict_array (32.7304,-1.38317) 101 -dictGet test_01037.dict_array (32.7353,1.01601) 101 -dictGet test_01037.dict_array (32.7354,4.17574) 101 -dictGet test_01037.dict_array (32.7357,-0.190194) 101 -dictGet test_01037.dict_array (32.7465,-1.37598) 101 -dictGet test_01037.dict_array (32.7494,-0.275675) 101 -dictGet test_01037.dict_array (32.7514,0.128951) 101 -dictGet test_01037.dict_array (32.753,3.44207) 101 -dictGet test_01037.dict_array (32.7686,2.11713) 101 -dictGet test_01037.dict_array (32.7694,1.47159) 101 -dictGet test_01037.dict_array (32.7768,0.0401042) 101 -dictGet test_01037.dict_array (32.781,-1.34283) 101 -dictGet test_01037.dict_array (32.7814,1.73876) 101 -dictGet test_01037.dict_array (32.7856,-1.06363) 101 -dictGet test_01037.dict_array (32.792699999999996,-1.1255600000000001) 101 -dictGet test_01037.dict_array (32.7941,-0.645447) 101 -dictGet test_01037.dict_array (32.7946,1.48889) 101 -dictGet test_01037.dict_array (32.797,0.791753) 101 -dictGet test_01037.dict_array (32.7982,-0.537798) 101 -dictGet test_01037.dict_array (32.8091,2.3611) 101 -dictGet test_01037.dict_array (32.81,1.7130800000000002) 101 -dictGet test_01037.dict_array (32.8174,-0.288322) 101 -dictGet test_01037.dict_array (32.823,1.6546699999999999) 101 -dictGet test_01037.dict_array (32.8233,1.62108) 101 -dictGet test_01037.dict_array (32.8428,-0.400045) 101 -dictGet test_01037.dict_array (32.8479,2.13598) 101 -dictGet test_01037.dict_array (32.8524,0.199902) 101 -dictGet test_01037.dict_array (32.8543,3.23553) 101 -dictGet test_01037.dict_array (32.8562,1.31371) 101 -dictGet test_01037.dict_array (32.87,1.44256) 101 -dictGet test_01037.dict_array (32.8789,2.38192) 101 -dictGet test_01037.dict_array (32.8812,2.20734) 5999168 -dictGet test_01037.dict_array (32.8815,-0.54427) 101 -dictGet test_01037.dict_array (32.8853,2.4859) 5999168 -dictGet test_01037.dict_array (32.8909,0.513964) 101 -dictGet test_01037.dict_array (32.9035,2.38999) 101 -dictGet test_01037.dict_array (32.9097,2.48131) 5999168 -dictGet test_01037.dict_array (32.928,-0.943269) 101 -dictGet test_01037.dict_array (32.9322,1.13165) 101 -dictGet test_01037.dict_array (32.9348,1.22606) 101 -dictGet test_01037.dict_array (32.9417,3.77998) 101 -dictGet test_01037.dict_array (32.9428,3.11936) 101 -dictGet test_01037.dict_array (32.9482,1.18092) 101 -dictGet test_01037.dict_array (32.9506,0.0609364) 101 -dictGet test_01037.dict_array (32.953,-0.828308) 101 -dictGet test_01037.dict_array (32.9593,3.5209099999999998) 101 -dictGet test_01037.dict_array (32.9617,2.07711) 5999168 -dictGet test_01037.dict_array (32.966,0.693749) 101 -dictGet test_01037.dict_array (32.9668,-0.716432) 101 -dictGet test_01037.dict_array (32.9702,1.98555) 101 -dictGet test_01037.dict_array (32.9782,1.73819) 101 -dictGet test_01037.dict_array (32.9805,3.71151) 101 -dictGet test_01037.dict_array (32.9821,2.97225) 101 -dictGet test_01037.dict_array (32.995,-0.830301) 101 -dictGet test_01037.dict_array (33.0234,0.770848) 101 -dictGet test_01037.dict_array (33.0312,-0.340964) 101 -dictGet test_01037.dict_array (33.0366,-0.756795) 101 -dictGet test_01037.dict_array (33.0438,0.812871) 101 -dictGet test_01037.dict_array (33.0455,1.84843) 101 -dictGet test_01037.dict_array (33.0498,0.0913292) 101 -dictGet test_01037.dict_array (33.0506,1.53739) 101 -dictGet test_01037.dict_array (33.0554,2.4265) 101 -dictGet test_01037.dict_array (33.0741,3.61332) 101 -dictGet test_01037.dict_array (33.0765,-0.179985) 101 -dictGet test_01037.dict_array (33.087,1.46465) 101 -dictGet test_01037.dict_array (33.0906,-0.620383) 101 -dictGet test_01037.dict_array (33.1047,-1.28027) 101 -dictGet test_01037.dict_array (33.1072,1.96303) 101 -dictGet test_01037.dict_array (33.1081,-0.897874) 101 -dictGet test_01037.dict_array (33.1122,1.8950200000000001) 101 -dictGet test_01037.dict_array (33.1237,2.63993) 101 -dictGet test_01037.dict_array (33.1238,0.753963) 101 -dictGet test_01037.dict_array (33.1257,0.495668) 101 -dictGet test_01037.dict_array (33.1258,1.78341) 101 -dictGet test_01037.dict_array (33.127,2.59646) 101 -dictGet test_01037.dict_array (33.1324,-1.23742) 101 -dictGet test_01037.dict_array (33.1359,3.83491) 101 -dictGet test_01037.dict_array (33.1628,-0.379588) 101 -dictGet test_01037.dict_array (33.1679,1.25601) 101 -dictGet test_01037.dict_array (33.1688,-1.35553) 101 -dictGet test_01037.dict_array (33.181,2.10943) 101 -dictGet test_01037.dict_array (33.1871,2.81171) 101 -dictGet test_01037.dict_array (33.1877,0.771297) 101 -dictGet test_01037.dict_array (33.1883,-0.204797) 101 -dictGet test_01037.dict_array (33.1886,3.27998) 101 -dictGet test_01037.dict_array (33.1955,0.708907) 101 -dictGet test_01037.dict_array (33.2044,-0.769275) 101 -dictGet test_01037.dict_array (33.2182,3.36103) 101 -dictGet test_01037.dict_array (33.2192,3.43586) 101 -dictGet test_01037.dict_array (33.2322,-0.916753) 101 -dictGet test_01037.dict_array (33.2359,-0.81321) 101 -dictGet test_01037.dict_array (33.238,0.635072) 101 -dictGet test_01037.dict_array (33.2398,3.02588) 101 -dictGet test_01037.dict_array (33.2469,2.35698) 101 -dictGet test_01037.dict_array (33.247,2.3327) 101 -dictGet test_01037.dict_array (33.2579,2.8027100000000003) 101 -dictGet test_01037.dict_array (33.2607,0.321082) 101 -dictGet test_01037.dict_array (33.2653,0.243336) 101 -dictGet test_01037.dict_array (33.2758,0.831836) 101 -dictGet test_01037.dict_array (33.2771,0.886536) 101 -dictGet test_01037.dict_array (33.2914,1.16026) 101 -dictGet test_01037.dict_array (33.2914,1.38882) 101 -dictGet test_01037.dict_array (33.2982,-1.16604) 101 -dictGet test_01037.dict_array (33.2985,0.842556) 101 -dictGet test_01037.dict_array (33.3005,2.8338900000000002) 101 -dictGet test_01037.dict_array (33.305,0.0969475) 101 -dictGet test_01037.dict_array (33.3072,3.82163) 101 -dictGet test_01037.dict_array (33.312,3.41475) 101 -dictGet test_01037.dict_array (33.3129,2.46048) 101 -dictGet test_01037.dict_array (33.3134,3.46863) 101 -dictGet test_01037.dict_array (33.3203,2.33139) 101 -dictGet test_01037.dict_array (33.324,0.433701) 101 -dictGet test_01037.dict_array (33.3338,2.44705) 101 -dictGet test_01037.dict_array (33.337,4.06475) 101 -dictGet test_01037.dict_array (33.3469,1.08172) 101 -dictGet test_01037.dict_array (33.3538,0.717896) 101 -dictGet test_01037.dict_array (33.3618,1.37899) 101 -dictGet test_01037.dict_array (33.3698,0.547744) 101 -dictGet test_01037.dict_array (33.3705,0.957619) 101 -dictGet test_01037.dict_array (33.3821,3.07258) 101 -dictGet test_01037.dict_array (33.3881,3.0626) 101 -dictGet test_01037.dict_array (33.393,-0.816186) 101 -dictGet test_01037.dict_array (33.3945,0.869508) 101 -dictGet test_01037.dict_array (33.4001,1.24186) 101 -dictGet test_01037.dict_array (33.4008,2.34911) 101 -dictGet test_01037.dict_array (33.4166,-1.2808899999999999) 101 -dictGet test_01037.dict_array (33.4167,3.0655) 101 -dictGet test_01037.dict_array (33.4204,2.81887) 101 -dictGet test_01037.dict_array (33.4211,1.71128) 101 -dictGet test_01037.dict_array (33.4237,2.91761) 101 -dictGet test_01037.dict_array (33.4266,1.5955599999999999) 101 -dictGet test_01037.dict_array (33.4353,-0.391392) 101 -dictGet test_01037.dict_array (33.4362,-0.134658) 101 -dictGet test_01037.dict_array (33.4386,0.15396) 101 -dictGet test_01037.dict_array (33.4421,-0.50712) 101 -dictGet test_01037.dict_array (33.452,0.915829) 101 -dictGet test_01037.dict_array (33.463,-0.0882717) 101 -dictGet test_01037.dict_array (33.464,-1.00949) 101 -dictGet test_01037.dict_array (33.4692,0.954092) 101 -dictGet test_01037.dict_array (33.4716,1.9538799999999998) 101 -dictGet test_01037.dict_array (33.4756,1.85836) 101 -dictGet test_01037.dict_array (33.4859,4.0751) 101 -dictGet test_01037.dict_array (33.4899,3.54193) 101 -dictGet test_01037.dict_array (33.4935,3.49794) 101 -dictGet test_01037.dict_array (33.494,-0.983356) 101 -dictGet test_01037.dict_array (33.4955,-1.28128) 101 -dictGet test_01037.dict_array (33.4965,-0.278687) 101 -dictGet test_01037.dict_array (33.4991,0.647491) 101 -dictGet test_01037.dict_array (33.5076,2.2272) 101 -dictGet test_01037.dict_array (33.5079,-0.498199) 101 -dictGet test_01037.dict_array (33.5157,0.535034) 101 -dictGet test_01037.dict_array (33.5171,2.49677) 101 -dictGet test_01037.dict_array (33.5255,2.4447200000000002) 101 -dictGet test_01037.dict_array (33.526,4.01194) 101 -dictGet test_01037.dict_array (33.5288,0.789434) 101 -dictGet test_01037.dict_array (33.5356,-1.17671) 101 -dictGet test_01037.dict_array (33.5402,1.49152) 101 -dictGet test_01037.dict_array (33.5418,3.45757) 101 -dictGet test_01037.dict_array (33.5428,1.90712) 101 -dictGet test_01037.dict_array (33.5556,-0.55741) 101 -dictGet test_01037.dict_array (33.5564,0.876858) 101 -dictGet test_01037.dict_array (33.5567,-0.10208) 101 -dictGet test_01037.dict_array (33.5645,-0.124824) 101 -dictGet test_01037.dict_array (33.5663,3.4872) 101 -dictGet test_01037.dict_array (33.5716,-0.0107611) 101 -dictGet test_01037.dict_array (33.578,3.55714) 101 -dictGet test_01037.dict_array (33.5826,-0.49076) 101 -dictGet test_01037.dict_array (33.5909,0.773737) 101 -dictGet test_01037.dict_array (33.5958,2.9619999999999997) 5994231 -dictGet test_01037.dict_array (33.6193,-0.919755) 101 -dictGet test_01037.dict_array (33.6313,0.652132) 101 -dictGet test_01037.dict_array (33.632,0.823351) 101 -dictGet test_01037.dict_array (33.66,2.18998) 101 -dictGet test_01037.dict_array (33.6621,0.535395) 101 -dictGet test_01037.dict_array (33.6726,3.19367) 101 -dictGet test_01037.dict_array (33.6912,1.74522) 101 -dictGet test_01037.dict_array (33.705,0.706397) 101 -dictGet test_01037.dict_array (33.7076,0.7622) 101 -dictGet test_01037.dict_array (33.7112,1.70187) 101 -dictGet test_01037.dict_array (33.7246,-1.14837) 101 -dictGet test_01037.dict_array (33.7326,2.62413) 5994231 -dictGet test_01037.dict_array (33.7332,2.82137) 5994231 -dictGet test_01037.dict_array (33.7434,0.394672) 101 -dictGet test_01037.dict_array (33.7443,1.54557) 101 -dictGet test_01037.dict_array (33.7506,1.57317) 101 -dictGet test_01037.dict_array (33.7526,1.8578999999999999) 101 -dictGet test_01037.dict_array (33.766,4.15013) 101 -dictGet test_01037.dict_array (33.7834,2.41789) 101 -dictGet test_01037.dict_array (33.7864,0.230935) 101 -dictGet test_01037.dict_array (33.7965,3.05709) 101 -dictGet test_01037.dict_array (33.7998,3.32881) 101 -dictGet test_01037.dict_array (33.8003,2.97338) 5994231 -dictGet test_01037.dict_array (33.8007,-1.08962) 101 -dictGet test_01037.dict_array (33.8022,-0.139488) 101 -dictGet test_01037.dict_array (33.8065,2.70857) 5994231 -dictGet test_01037.dict_array (33.8169,-0.607788) 101 -dictGet test_01037.dict_array (33.8203,0.108512) 101 -dictGet test_01037.dict_array (33.8231,-1.03449) 101 -dictGet test_01037.dict_array (33.8312,3.49458) 101 -dictGet test_01037.dict_array (33.8342,0.297518) 101 -dictGet test_01037.dict_array (33.8352,0.165872) 101 -dictGet test_01037.dict_array (33.8354,1.87277) 101 -dictGet test_01037.dict_array (33.8371,1.60103) 101 -dictGet test_01037.dict_array (33.8387,1.9968) 101 -dictGet test_01037.dict_array (33.8403,3.5805) 101 -dictGet test_01037.dict_array (33.8414,-0.703067) 101 -dictGet test_01037.dict_array (33.844,-0.179472) 101 -dictGet test_01037.dict_array (33.8468,3.40137) 101 -dictGet test_01037.dict_array (33.8509,4.15334) 101 -dictGet test_01037.dict_array (33.8539,2.38339) 101 -dictGet test_01037.dict_array (33.858,-1.3122500000000001) 101 -dictGet test_01037.dict_array (33.859,3.72626) 101 -dictGet test_01037.dict_array (33.8616,2.24433) 101 -dictGet test_01037.dict_array (33.8621,3.01035) 101 -dictGet test_01037.dict_array (33.8623,1.17559) 101 -dictGet test_01037.dict_array (33.8682,2.706) 5994231 -dictGet test_01037.dict_array (33.8684,0.189231) 101 -dictGet test_01037.dict_array (33.872,1.93574) 101 -dictGet test_01037.dict_array (33.8844,3.80404) 101 -dictGet test_01037.dict_array (33.8888,0.594884) 101 -dictGet test_01037.dict_array (33.8946,2.74161) 101 -dictGet test_01037.dict_array (33.9023,0.6239) 101 -dictGet test_01037.dict_array (33.9057,0.873222) 101 -dictGet test_01037.dict_array (33.9157,-1.26607) 101 -dictGet test_01037.dict_array (33.92,2.06848) 101 -dictGet test_01037.dict_array (33.9298,-0.00526229) 101 -dictGet test_01037.dict_array (33.932,3.07063) 101 -dictGet test_01037.dict_array (33.9322,0.629385) 101 -dictGet test_01037.dict_array (33.9367,-1.41955) 101 -dictGet test_01037.dict_array (33.937,1.42532) 101 -dictGet test_01037.dict_array (33.9375,1.1467100000000001) 101 -dictGet test_01037.dict_array (33.9434,-1.05739) 101 -dictGet test_01037.dict_array (33.9477,3.34809) 101 -dictGet test_01037.dict_array (33.95,2.21715) 101 -dictGet test_01037.dict_array (33.955799999999996,0.305176) 101 -dictGet test_01037.dict_array (33.9686,-0.28273) 101 -dictGet test_01037.dict_array (33.9703,4.1255) 101 -dictGet test_01037.dict_array (33.9707,3.08199) 101 -dictGet test_01037.dict_array (33.9754,1.06203) 101 -dictGet test_01037.dict_array (33.9757,3.72468) 101 -dictGet test_01037.dict_array (33.9775,-0.0440599) 101 -dictGet test_01037.dict_array (33.9777,-0.251484) 101 -dictGet test_01037.dict_array (33.9789,-0.339374) 101 -dictGet test_01037.dict_array (33.9849,2.54515) 5994231 -dictGet test_01037.dict_array (33.9885,-0.318557) 101 -dictGet test_01037.dict_array (33.9977,1.07175) 101 -dictGet test_01037.dict_array (33.9984,-0.700517) 101 -dictGet test_01037.dict_array (34.0149,3.53338) 101 -dictGet test_01037.dict_array (34.0173,3.39155) 101 -dictGet test_01037.dict_array (34.0317,3.9579) 101 -dictGet test_01037.dict_array (34.0369,3.83612) 101 -dictGet test_01037.dict_array (34.043,-0.0887221) 101 -dictGet test_01037.dict_array (34.0487,1.14252) 101 -dictGet test_01037.dict_array (34.052,1.74832) 101 -dictGet test_01037.dict_array (34.0711,-0.898071) 101 -dictGet test_01037.dict_array (34.0747,1.55057) 101 -dictGet test_01037.dict_array (34.0803,3.16763) 101 -dictGet test_01037.dict_array (34.0872,3.75555) 101 -dictGet test_01037.dict_array (34.0965,1.62038) 101 -dictGet test_01037.dict_array (34.0977,-0.412691) 101 -dictGet test_01037.dict_array (34.0986,0.0294206) 101 -dictGet test_01037.dict_array (34.1072,3.15823) 101 -dictGet test_01037.dict_array (34.1092,3.09599) 101 -dictGet test_01037.dict_array (34.1206,1.04637) 5940222 -dictGet test_01037.dict_array (34.1209,3.13826) 101 -dictGet test_01037.dict_array (34.1265,3.95881) 101 -dictGet test_01037.dict_array (34.1286,-0.539319) 101 -dictGet test_01037.dict_array (34.1358,3.67451) 101 -dictGet test_01037.dict_array (34.1428,0.136115) 101 -dictGet test_01037.dict_array (34.157,1.73522) 101 -dictGet test_01037.dict_array (34.1581,1.48001) 101 -dictGet test_01037.dict_array (34.1682,3.42373) 101 -dictGet test_01037.dict_array (34.1683,-1.26511) 101 -dictGet test_01037.dict_array (34.1684,4.20007) 101 -dictGet test_01037.dict_array (34.1854,3.32089) 101 -dictGet test_01037.dict_array (34.2022,0.749536) 101 -dictGet test_01037.dict_array (34.2044,3.04865) 101 -dictGet test_01037.dict_array (34.22,-0.500055) 101 -dictGet test_01037.dict_array (34.2249,0.743775) 101 -dictGet test_01037.dict_array (34.2254,1.34702) 101 -dictGet test_01037.dict_array (34.2355,-0.898843) 101 -dictGet test_01037.dict_array (34.2394,2.0203699999999998) 101 -dictGet test_01037.dict_array (34.2466,1.83785) 101 -dictGet test_01037.dict_array (34.247,4.09563) 101 -dictGet test_01037.dict_array (34.2508,2.61312) 101 -dictGet test_01037.dict_array (34.2517,1.69642) 101 -dictGet test_01037.dict_array (34.2564,4.13033) 101 -dictGet test_01037.dict_array (34.2574,4.18928) 101 -dictGet test_01037.dict_array (34.2614,-0.478719) 101 -dictGet test_01037.dict_array (34.2625,2.38088) 101 -dictGet test_01037.dict_array (34.2666,3.1503) 101 -dictGet test_01037.dict_array (34.271,4.02223) 101 -dictGet test_01037.dict_array (34.2727,0.514755) 101 -dictGet test_01037.dict_array (34.278,1.98929) 101 -dictGet test_01037.dict_array (34.2798,-0.199208) 101 -dictGet test_01037.dict_array (34.2804,2.05184) 101 -dictGet test_01037.dict_array (34.2945,-1.11051) 101 -dictGet test_01037.dict_array (34.3168,-0.0829721) 101 -dictGet test_01037.dict_array (34.3345,3.4358) 101 -dictGet test_01037.dict_array (34.3377,1.13527) 5940222 -dictGet test_01037.dict_array (34.3383,1.27891) 5940222 -dictGet test_01037.dict_array (34.3391,1.47945) 5940222 -dictGet test_01037.dict_array (34.3441,0.627014) 101 -dictGet test_01037.dict_array (34.347,2.4853) 101 -dictGet test_01037.dict_array (34.3514,2.16247) 101 -dictGet test_01037.dict_array (34.3627,2.64533) 101 -dictGet test_01037.dict_array (34.3682,-0.227501) 101 -dictGet test_01037.dict_array (34.3756,4.21248) 101 -dictGet test_01037.dict_array (34.379,3.96604) 101 -dictGet test_01037.dict_array (34.3827,1.7518) 101 -dictGet test_01037.dict_array (34.3912,2.8834) 101 -dictGet test_01037.dict_array (34.3919,0.668829) 101 -dictGet test_01037.dict_array (34.3949,2.00338) 101 -dictGet test_01037.dict_array (34.3987,0.557268) 101 -dictGet test_01037.dict_array (34.4111,0.768558) 101 -dictGet test_01037.dict_array (34.4119,2.8742) 101 -dictGet test_01037.dict_array (34.416,3.50841) 101 -dictGet test_01037.dict_array (34.4212,1.24916) 5940222 -dictGet test_01037.dict_array (34.4251,0.457029) 101 -dictGet test_01037.dict_array (34.4274,-0.902559) 101 -dictGet test_01037.dict_array (34.4325,4.03159) 101 -dictGet test_01037.dict_array (34.438,1.63994) 101 -dictGet test_01037.dict_array (34.4403,-0.177594) 101 -dictGet test_01037.dict_array (34.4421,0.726712) 101 -dictGet test_01037.dict_array (34.4517,2.98611) 101 -dictGet test_01037.dict_array (34.4658,-1.312) 101 -dictGet test_01037.dict_array (34.4732,-0.0681338) 101 -dictGet test_01037.dict_array (34.4752,2.81646) 101 -dictGet test_01037.dict_array (34.4914,2.3858) 101 -dictGet test_01037.dict_array (34.4923,0.855231) 101 -dictGet test_01037.dict_array (34.5235,1.78468) 101 -dictGet test_01037.dict_array (34.5305,4.10608) 101 -dictGet test_01037.dict_array (34.5389,0.621937) 101 -dictGet test_01037.dict_array (34.5406,3.17145) 101 -dictGet test_01037.dict_array (34.5434,-0.56306) 101 -dictGet test_01037.dict_array (34.5449,3.13311) 101 -dictGet test_01037.dict_array (34.5491,2.31572) 101 -dictGet test_01037.dict_array (34.5539,2.94028) 101 -dictGet test_01037.dict_array (34.5546,-0.208825) 101 -dictGet test_01037.dict_array (34.5549,3.78486) 101 -dictGet test_01037.dict_array (34.5676,0.307148) 101 -dictGet test_01037.dict_array (34.5743,1.5217399999999999) 101 -dictGet test_01037.dict_array (34.5775,3.48046) 101 -dictGet test_01037.dict_array (34.5815,2.5243700000000002) 101 -dictGet test_01037.dict_array (34.5841,4.21191) 101 -dictGet test_01037.dict_array (34.5887,2.65083) 101 -dictGet test_01037.dict_array (34.5937,3.2143) 101 -dictGet test_01037.dict_array (34.6013,-1.0612) 101 -dictGet test_01037.dict_array (34.6089,1.36066) 101 -dictGet test_01037.dict_array (34.6103,3.40227) 101 -dictGet test_01037.dict_array (34.6128,1.92276) 101 -dictGet test_01037.dict_array (34.6175,2.43627) 101 -dictGet test_01037.dict_array (34.6209,3.43776) 101 -dictGet test_01037.dict_array (34.6234,2.60237) 101 -dictGet test_01037.dict_array (34.6275,3.52479) 101 -dictGet test_01037.dict_array (34.635,0.568558) 101 -dictGet test_01037.dict_array (34.6373,2.37692) 101 -dictGet test_01037.dict_array (34.6375,3.52234) 101 -dictGet test_01037.dict_array (34.6426,2.12397) 101 -dictGet test_01037.dict_array (34.6513,2.80915) 101 -dictGet test_01037.dict_array (34.6632,2.30039) 101 -dictGet test_01037.dict_array (34.6691,1.86582) 101 -dictGet test_01037.dict_array (34.6739,0.15342) 101 -dictGet test_01037.dict_array (34.6825,0.0499679) 101 -dictGet test_01037.dict_array (34.6893,0.454326) 101 -dictGet test_01037.dict_array (34.6957,-0.358598) 101 -dictGet test_01037.dict_array (34.6986,0.562679) 101 -dictGet test_01037.dict_array (34.712,1.12114) 101 -dictGet test_01037.dict_array (34.7126,-0.0057301) 101 -dictGet test_01037.dict_array (34.7137,0.0248501) 101 -dictGet test_01037.dict_array (34.7162,1.15623) 101 -dictGet test_01037.dict_array (34.7258,3.95142) 101 -dictGet test_01037.dict_array (34.7347,3.5232099999999997) 101 -dictGet test_01037.dict_array (34.7363,2.23374) 101 -dictGet test_01037.dict_array (34.7375,0.397841) 101 -dictGet test_01037.dict_array (34.7423,3.09198) 101 -dictGet test_01037.dict_array (34.7452,3.09029) 101 -dictGet test_01037.dict_array (34.7539,-1.06943) 101 -dictGet test_01037.dict_array (34.7733,-0.00912717) 101 -dictGet test_01037.dict_array (34.774,2.71088) 101 -dictGet test_01037.dict_array (34.7771,1.46009) 101 -dictGet test_01037.dict_array (34.7782,-1.28308) 101 -dictGet test_01037.dict_array (34.7924,3.63564) 101 -dictGet test_01037.dict_array (34.7939,-0.416676) 101 -dictGet test_01037.dict_array (34.7964,-0.401773) 101 -dictGet test_01037.dict_array (34.7974,0.0286873) 101 -dictGet test_01037.dict_array (34.7975,3.05965) 101 -dictGet test_01037.dict_array (34.8037,3.07263) 101 -dictGet test_01037.dict_array (34.8254,-0.390284) 101 -dictGet test_01037.dict_array (34.828,1.91869) 101 -dictGet test_01037.dict_array (34.8289,3.71058) 101 -dictGet test_01037.dict_array (34.8403,2.14606) 101 -dictGet test_01037.dict_array (34.8437,2.20617) 101 -dictGet test_01037.dict_array (34.8469,2.38435) 101 -dictGet test_01037.dict_array (34.86,1.45705) 101 -dictGet test_01037.dict_array (34.8612,0.914248) 101 -dictGet test_01037.dict_array (34.8663,3.4215400000000002) 101 -dictGet test_01037.dict_array (34.8724,-0.375144) 101 -dictGet test_01037.dict_array (34.8795,3.29317) 101 -dictGet test_01037.dict_array (34.8823,1.21988) 101 -dictGet test_01037.dict_array (34.8834,1.07657) 101 -dictGet test_01037.dict_array (34.8837,0.157648) 101 -dictGet test_01037.dict_array (34.8871,-0.9755) 101 -dictGet test_01037.dict_array (34.8871,1.8943699999999999) 101 -dictGet test_01037.dict_array (34.889,3.36756) 101 -dictGet test_01037.dict_array (34.8907,1.24874) 101 -dictGet test_01037.dict_array (34.8965,3.13508) 101 -dictGet test_01037.dict_array (34.9042,2.62092) 101 -dictGet test_01037.dict_array (34.9055,-0.0448967) 101 -dictGet test_01037.dict_array (34.9122,0.110576) 101 -dictGet test_01037.dict_array (34.9228,3.60183) 101 -dictGet test_01037.dict_array (34.9237,1.21715) 101 -dictGet test_01037.dict_array (34.9296,1.70459) 101 -dictGet test_01037.dict_array (34.941,-1.14663) 101 -dictGet test_01037.dict_array (34.9448,1.18923) 101 -dictGet test_01037.dict_array (34.9462,3.81678) 101 -dictGet test_01037.dict_array (34.9466,0.593463) 101 -dictGet test_01037.dict_array (34.9485,0.150307) 101 -dictGet test_01037.dict_array (34.9542,0.487238) 101 -dictGet test_01037.dict_array (34.9559,2.03473) 101 -dictGet test_01037.dict_array (34.9671,-0.960225) 101 -dictGet test_01037.dict_array (34.9711,2.63444) 101 -dictGet test_01037.dict_array (34.9892,0.354775) 101 -dictGet test_01037.dict_array (34.9907,1.40724) 101 -dictGet test_01037.dict_array (34.9916,-0.00173097) 101 -dictGet test_01037.dict_array (34.9919,2.06167) 101 +dictGet dict_array (29.5699,2.50068) 101 +dictGet dict_array (29.5796,1.55456) 101 +dictGet dict_array (29.5796,2.36864) 101 +dictGet dict_array (29.5844,1.59626) 101 +dictGet dict_array (29.5886,4.03321) 101 +dictGet dict_array (29.5914,3.02628) 101 +dictGet dict_array (29.5926,-0.0965169) 101 +dictGet dict_array (29.5968,2.37773) 101 +dictGet dict_array (29.5984,0.755853) 101 +dictGet dict_array (29.6066,3.47173) 101 +dictGet dict_array (29.6085,-1.26007) 101 +dictGet dict_array (29.6131,0.246565) 101 +dictGet dict_array (29.6157,-0.266687) 101 +dictGet dict_array (29.6164,2.94674) 101 +dictGet dict_array (29.6195,-0.591941) 101 +dictGet dict_array (29.6231,1.54818) 101 +dictGet dict_array (29.6379,0.764114) 101 +dictGet dict_array (29.6462,-0.772059) 934570 +dictGet dict_array (29.6579,-1.07336) 101 +dictGet dict_array (29.6618,-0.271842) 101 +dictGet dict_array (29.6629,-0.303602) 101 +dictGet dict_array (29.6659,-0.782823) 934570 +dictGet dict_array (29.6736,-0.113832) 101 +dictGet dict_array (29.6759,3.02905) 101 +dictGet dict_array (29.6778,3.71898) 101 +dictGet dict_array (29.6796,1.10433) 101 +dictGet dict_array (29.6809,2.13677) 101 +dictGet dict_array (29.6935,4.11894) 101 +dictGet dict_array (29.6991,-1.4458199999999999) 101 +dictGet dict_array (29.6997,3.17297) 101 +dictGet dict_array (29.7043,3.6145899999999997) 101 +dictGet dict_array (29.7065,3.24885) 101 +dictGet dict_array (29.7126,0.28108) 101 +dictGet dict_array (29.7192,0.174273) 101 +dictGet dict_array (29.7217,-0.523481) 934570 +dictGet dict_array (29.7271,1.67967) 101 +dictGet dict_array (29.7311,4.12444) 101 +dictGet dict_array (29.7347,1.88378) 101 +dictGet dict_array (29.7358,0.67944) 101 +dictGet dict_array (29.7366,-0.2973) 101 +dictGet dict_array (29.7446,0.646536) 101 +dictGet dict_array (29.7453,-0.567963) 101 +dictGet dict_array (29.764,4.04217) 101 +dictGet dict_array (29.7655,1.51372) 101 +dictGet dict_array (29.7744,1.12435) 101 +dictGet dict_array (29.7774,-0.0681196) 101 +dictGet dict_array (29.7784,1.54864) 101 +dictGet dict_array (29.7785,2.24139) 101 +dictGet dict_array (29.7922,0.220808) 101 +dictGet dict_array (29.7936,2.37709) 101 +dictGet dict_array (29.8008,0.948536) 101 +dictGet dict_array (29.8115,0.201227) 101 +dictGet dict_array (29.814,0.149601) 101 +dictGet dict_array (29.8193,-1.35858) 101 +dictGet dict_array (29.8201,0.965518) 101 +dictGet dict_array (29.8265,-0.727286) 101 +dictGet dict_array (29.8277,-0.531746) 101 +dictGet dict_array (29.8289,3.63009) 101 +dictGet dict_array (29.8548,0.838047) 101 +dictGet dict_array (29.8641,-0.845265) 101 +dictGet dict_array (29.8649,0.0562212) 101 +dictGet dict_array (29.8701,-1.02045) 101 +dictGet dict_array (29.8733,2.76654) 101 +dictGet dict_array (29.876,0.555475) 101 +dictGet dict_array (29.8794,-0.800108) 101 +dictGet dict_array (29.8813,2.7426399999999997) 101 +dictGet dict_array (29.897100000000002,2.66193) 101 +dictGet dict_array (29.908,4.01339) 101 +dictGet dict_array (29.9165,-1.08246) 101 +dictGet dict_array (29.9201,-0.420861) 101 +dictGet dict_array (29.9217,3.03778) 101 +dictGet dict_array (29.9355,0.773833) 101 +dictGet dict_array (29.947,3.76517) 101 +dictGet dict_array (29.9518,-0.60557) 101 +dictGet dict_array (29.9564,-0.600163) 101 +dictGet dict_array (29.959600000000002,4.16591) 101 +dictGet dict_array (29.9615,-1.33708) 101 +dictGet dict_array (29.9699,-0.392375) 101 +dictGet dict_array (29.9776,1.04552) 101 +dictGet dict_array (29.9784,4.02756) 101 +dictGet dict_array (29.9819,4.00597) 101 +dictGet dict_array (29.9826,1.2816100000000001) 101 +dictGet dict_array (30.0026,2.76257) 101 +dictGet dict_array (30.0126,3.68255) 101 +dictGet dict_array (30.0131,0.796576) 101 +dictGet dict_array (30.018,1.16523) 101 +dictGet dict_array (30.0261,-0.210653) 101 +dictGet dict_array (30.0472,-1.11007) 101 +dictGet dict_array (30.0542,-0.479585) 101 +dictGet dict_array (30.0613,1.6278000000000001) 101 +dictGet dict_array (30.0617,-0.0551152) 101 +dictGet dict_array (30.0637,2.62066) 101 +dictGet dict_array (30.0721,1.6424400000000001) 101 +dictGet dict_array (30.0769,-0.402636) 101 +dictGet dict_array (30.0791,-0.277435) 101 +dictGet dict_array (30.0931,0.0327512) 101 +dictGet dict_array (30.1059,3.52623) 101 +dictGet dict_array (30.1103,0.865466) 101 +dictGet dict_array (30.1115,2.95243) 101 +dictGet dict_array (30.1144,1.71029) 101 +dictGet dict_array (30.1311,-0.864751) 101 +dictGet dict_array (30.1336,-0.851386) 101 +dictGet dict_array (30.1393,3.89901) 101 +dictGet dict_array (30.1456,-0.531898) 101 +dictGet dict_array (30.1492,2.07833) 101 +dictGet dict_array (30.1575,2.43856) 101 +dictGet dict_array (30.1682,1.19771) 101 +dictGet dict_array (30.1716,3.9853300000000003) 101 +dictGet dict_array (30.1849,2.78374) 101 +dictGet dict_array (30.1866,0.65658) 101 +dictGet dict_array (30.1885,1.56943) 101 +dictGet dict_array (30.1959,-1.38202) 101 +dictGet dict_array (30.1999,1.58413) 101 +dictGet dict_array (30.2024,0.713081) 101 +dictGet dict_array (30.2054,0.620143) 101 +dictGet dict_array (30.2091,1.51641) 101 +dictGet dict_array (30.2124,-0.331782) 101 +dictGet dict_array (30.226,3.03527) 101 +dictGet dict_array (30.2261,3.18486) 101 +dictGet dict_array (30.2288,2.48407) 101 +dictGet dict_array (30.2345,3.7462400000000002) 101 +dictGet dict_array (30.2375,0.62046) 101 +dictGet dict_array (30.2425,-0.472914) 101 +dictGet dict_array (30.247,3.95863) 101 +dictGet dict_array (30.2494,-0.305093) 101 +dictGet dict_array (30.2499,2.54337) 101 +dictGet dict_array (30.2606,2.16644) 101 +dictGet dict_array (30.2672,3.94847) 101 +dictGet dict_array (30.2709,-0.136264) 101 +dictGet dict_array (30.2764,1.18654) 101 +dictGet dict_array (30.2765,1.20383) 101 +dictGet dict_array (30.2839,1.05762) 101 +dictGet dict_array (30.286,0.469327) 101 +dictGet dict_array (30.2927,3.1693) 101 +dictGet dict_array (30.2935,3.49854) 101 +dictGet dict_array (30.307,0.312338) 101 +dictGet dict_array (30.3085,1.07791) 101 +dictGet dict_array (30.3139,2.77248) 101 +dictGet dict_array (30.314,0.822823) 101 +dictGet dict_array (30.3227,-0.587351) 101 +dictGet dict_array (30.332,1.00174) 101 +dictGet dict_array (30.3388,0.844148) 101 +dictGet dict_array (30.3485,0.561902) 101 +dictGet dict_array (30.3497,0.180362) 101 +dictGet dict_array (30.361,4.13016) 101 +dictGet dict_array (30.3623,-0.0484027) 101 +dictGet dict_array (30.3638,3.9845800000000002) 101 +dictGet dict_array (30.3853,3.16051) 101 +dictGet dict_array (30.3974,2.6617800000000003) 101 +dictGet dict_array (30.4002,-1.15886) 101 +dictGet dict_array (30.4008,-0.387015) 101 +dictGet dict_array (30.4018,1.86493) 101 +dictGet dict_array (30.4239,1.16818) 101 +dictGet dict_array (30.4363,3.63938) 101 +dictGet dict_array (30.4377,-0.81315) 101 +dictGet dict_array (30.4391,3.54703) 101 +dictGet dict_array (30.4424,-1.39435) 101 +dictGet dict_array (30.4441,2.8463000000000003) 101 +dictGet dict_array (30.4517,3.28117) 101 +dictGet dict_array (30.4658,2.6928) 101 +dictGet dict_array (30.4734,2.66161) 101 +dictGet dict_array (30.4799,-1.07578) 101 +dictGet dict_array (30.4837,-1.02486) 101 +dictGet dict_array (30.485,1.06326) 101 +dictGet dict_array (30.495,1.12306) 101 +dictGet dict_array (30.501,2.27264) 101 +dictGet dict_array (30.5027,1.99382) 101 +dictGet dict_array (30.5194,-1.03943) 101 +dictGet dict_array (30.5239,1.04328) 101 +dictGet dict_array (30.528,3.82041) 101 +dictGet dict_array (30.5299,-0.715248) 101 +dictGet dict_array (30.5331,1.19603) 101 +dictGet dict_array (30.535800000000002,2.71485) 101 +dictGet dict_array (30.5405,0.804694) 101 +dictGet dict_array (30.542,1.23739) 101 +dictGet dict_array (30.5432,4.04189) 101 +dictGet dict_array (30.5457,-0.956121) 101 +dictGet dict_array (30.5506,3.07443) 101 +dictGet dict_array (30.5539,3.87084) 101 +dictGet dict_array (30.5578,3.78837) 101 +dictGet dict_array (30.5588,0.966135) 101 +dictGet dict_array (30.5637,2.5605) 101 +dictGet dict_array (30.5647,-1.27328) 101 +dictGet dict_array (30.5656,-0.0581332) 101 +dictGet dict_array (30.5715,0.65755) 101 +dictGet dict_array (30.5727,3.01604) 101 +dictGet dict_array (30.5729,-0.976857) 101 +dictGet dict_array (30.5751,0.60204) 101 +dictGet dict_array (30.5854,3.02473) 101 +dictGet dict_array (30.5866,0.174099) 101 +dictGet dict_array (30.5947,0.875193) 101 +dictGet dict_array (30.5992,-0.403901) 101 +dictGet dict_array (30.6002,4.18891) 101 +dictGet dict_array (30.6025,0.217712) 101 +dictGet dict_array (30.6054,0.927203) 101 +dictGet dict_array (30.6075,3.79359) 101 +dictGet dict_array (30.6159,3.82773) 101 +dictGet dict_array (30.627,3.84039) 101 +dictGet dict_array (30.6308,0.77517) 101 +dictGet dict_array (30.6338,0.179565) 101 +dictGet dict_array (30.6461,1.3293599999999999) 101 +dictGet dict_array (30.6674,-0.424547) 101 +dictGet dict_array (30.669,1.76539) 101 +dictGet dict_array (30.6788,4.01239) 101 +dictGet dict_array (30.6864,3.59158) 101 +dictGet dict_array (30.7049,-0.875413) 101 +dictGet dict_array (30.705,1.3307) 101 +dictGet dict_array (30.7063,-0.473192) 101 +dictGet dict_array (30.7075,-1.1958199999999999) 101 +dictGet dict_array (30.7101,-0.367562) 101 +dictGet dict_array (30.7203,2.98725) 101 +dictGet dict_array (30.7213,2.2745699999999998) 101 +dictGet dict_array (30.7446,-0.334144) 101 +dictGet dict_array (30.7468,3.82967) 101 +dictGet dict_array (30.747,-0.384779) 101 +dictGet dict_array (30.7681,0.904198) 101 +dictGet dict_array (30.7757,1.78743) 101 +dictGet dict_array (30.8021,-0.479212) 101 +dictGet dict_array (30.8079,-1.40869) 101 +dictGet dict_array (30.8206,-0.0608489) 101 +dictGet dict_array (30.8218,0.43909) 101 +dictGet dict_array (30.8239,0.10014) 101 +dictGet dict_array (30.8282,4.15409) 101 +dictGet dict_array (30.8288,-0.709528) 101 +dictGet dict_array (30.8326,0.156011) 101 +dictGet dict_array (30.8328,-1.03704) 101 +dictGet dict_array (30.839,2.15528) 101 +dictGet dict_array (30.8452,0.219377) 101 +dictGet dict_array (30.8463,0.0515355) 101 +dictGet dict_array (30.8526,2.06614) 101 +dictGet dict_array (30.8566,0.517876) 101 +dictGet dict_array (30.8588,-1.31738) 101 +dictGet dict_array (30.8681,0.44207) 101 +dictGet dict_array (30.8914,1.0072) 101 +dictGet dict_array (30.897,0.483425) 101 +dictGet dict_array (30.905,2.8731999999999998) 101 +dictGet dict_array (30.9051,2.21956) 101 +dictGet dict_array (30.9115,4.00663) 101 +dictGet dict_array (30.9167,-0.834462) 101 +dictGet dict_array (30.9252,-1.3289900000000001) 101 +dictGet dict_array (30.9314,1.85384) 101 +dictGet dict_array (30.9392,2.53236) 101 +dictGet dict_array (30.9569,2.82038) 101 +dictGet dict_array (30.9598,-0.641011) 101 +dictGet dict_array (30.9601,-0.254928) 101 +dictGet dict_array (30.9623,-1.3886) 101 +dictGet dict_array (30.9707,0.888854) 101 +dictGet dict_array (30.9766,2.81957) 101 +dictGet dict_array (30.9775,2.69273) 101 +dictGet dict_array (30.9821,0.587715) 101 +dictGet dict_array (30.9887,4.0233) 101 +dictGet dict_array (30.9914,0.259542) 101 +dictGet dict_array (30.9986,-1.36832) 101 +dictGet dict_array (31.008,0.628999) 101 +dictGet dict_array (31.0168,-1.17462) 101 +dictGet dict_array (31.0237,3.52547) 101 +dictGet dict_array (31.0306,3.78522) 101 +dictGet dict_array (31.0308,-0.72453) 101 +dictGet dict_array (31.0463,2.41997) 101 +dictGet dict_array (31.047,0.624184) 101 +dictGet dict_array (31.0569,0.0706393) 5994232 +dictGet dict_array (31.0583,1.3244099999999999) 101 +dictGet dict_array (31.063,3.23861) 101 +dictGet dict_array (31.068,0.695575) 101 +dictGet dict_array (31.0687,1.85675) 101 +dictGet dict_array (31.0692,0.254793) 101 +dictGet dict_array (31.0766,0.828128) 101 +dictGet dict_array (31.0833,0.0612782) 5994232 +dictGet dict_array (31.0833,2.59748) 101 +dictGet dict_array (31.0861,-1.3778299999999999) 101 +dictGet dict_array (31.0874,3.07258) 101 +dictGet dict_array (31.0882,1.4882) 101 +dictGet dict_array (31.0924,3.42242) 101 +dictGet dict_array (31.0927,2.67448) 101 +dictGet dict_array (31.0936,1.12292) 101 +dictGet dict_array (31.0952,-0.336928) 101 +dictGet dict_array (31.0978,3.48482) 101 +dictGet dict_array (31.1107,3.7513199999999998) 101 +dictGet dict_array (31.1156,1.19171) 101 +dictGet dict_array (31.1176,0.223509) 5994232 +dictGet dict_array (31.1249,0.946838) 101 +dictGet dict_array (31.1267,1.48983) 101 +dictGet dict_array (31.138,-0.289981) 101 +dictGet dict_array (31.1382,3.02904) 101 +dictGet dict_array (31.1475,2.6178) 101 +dictGet dict_array (31.1491,1.37873) 101 +dictGet dict_array (31.1525,3.72105) 101 +dictGet dict_array (31.1526,-1.4129800000000001) 101 +dictGet dict_array (31.1526,-0.186457) 101 +dictGet dict_array (31.1539,2.78789) 101 +dictGet dict_array (31.1548,-1.08552) 101 +dictGet dict_array (31.1567,-0.0768925) 101 +dictGet dict_array (31.1613,1.49617) 101 +dictGet dict_array (31.1653,1.03777) 101 +dictGet dict_array (31.1662,3.4214700000000002) 101 +dictGet dict_array (31.1672,-0.0813169) 101 +dictGet dict_array (31.177,0.440843) 101 +dictGet dict_array (31.1788,-0.737151) 101 +dictGet dict_array (31.1856,-0.144396) 101 +dictGet dict_array (31.1959,3.66813) 101 +dictGet dict_array (31.1996,-0.353983) 101 +dictGet dict_array (31.2019,2.86802) 101 +dictGet dict_array (31.2087,2.31245) 101 +dictGet dict_array (31.2125,3.2713200000000002) 101 +dictGet dict_array (31.2137,-0.108129) 101 +dictGet dict_array (31.216,3.9156) 101 +dictGet dict_array (31.2201,-0.202141) 101 +dictGet dict_array (31.2285,2.09058) 101 +dictGet dict_array (31.2502,4.01526) 101 +dictGet dict_array (31.2585,3.11524) 101 +dictGet dict_array (31.2645,-0.620418) 101 +dictGet dict_array (31.2684,2.74277) 101 +dictGet dict_array (31.2821,-1.12772) 101 +dictGet dict_array (31.2821,2.46769) 101 +dictGet dict_array (31.2887,3.91396) 101 +dictGet dict_array (31.295,1.49942) 101 +dictGet dict_array (31.2997,3.46122) 101 +dictGet dict_array (31.3017,3.3263) 101 +dictGet dict_array (31.3022,3.16754) 101 +dictGet dict_array (31.3048,0.364962) 101 +dictGet dict_array (31.305,3.1967) 101 +dictGet dict_array (31.3061,1.84303) 101 +dictGet dict_array (31.3082,-0.173851) 101 +dictGet dict_array (31.3315,3.90932) 101 +dictGet dict_array (31.3351,2.80164) 101 +dictGet dict_array (31.3388,0.168765) 5994233 +dictGet dict_array (31.339,0.25535) 101 +dictGet dict_array (31.3423,1.7036799999999999) 101 +dictGet dict_array (31.349,0.386456) 101 +dictGet dict_array (31.3558,-1.04336) 101 +dictGet dict_array (31.3564,0.478876) 101 +dictGet dict_array (31.3607,-0.0860507) 5994233 +dictGet dict_array (31.3831,3.84469) 101 +dictGet dict_array (31.3886,-0.731137) 101 +dictGet dict_array (31.4043,-0.348907) 101 +dictGet dict_array (31.4081,1.47391) 101 +dictGet dict_array (31.4176,-0.583645) 101 +dictGet dict_array (31.4177,1.36972) 101 +dictGet dict_array (31.4182,0.958303) 101 +dictGet dict_array (31.4199,3.1738) 101 +dictGet dict_array (31.4221,2.74876) 101 +dictGet dict_array (31.4301,-0.122643) 5994233 +dictGet dict_array (31.4344,1.00661) 101 +dictGet dict_array (31.4375,4.20304) 101 +dictGet dict_array (31.4377,0.289608) 101 +dictGet dict_array (31.4379,0.54744) 101 +dictGet dict_array (31.4459,3.94945) 101 +dictGet dict_array (31.4559,-0.345063) 101 +dictGet dict_array (31.464,0.726129) 101 +dictGet dict_array (31.4662,-0.299019) 5994233 +dictGet dict_array (31.4671,1.9605299999999999) 101 +dictGet dict_array (31.4673,-0.403676) 101 +dictGet dict_array (31.4712,-0.237941) 5994233 +dictGet dict_array (31.4816,0.120264) 5994233 +dictGet dict_array (31.4875,0.323483) 101 +dictGet dict_array (31.490099999999998,-0.338163) 101 +dictGet dict_array (31.4932,0.517674) 101 +dictGet dict_array (31.5112,1.9689299999999998) 101 +dictGet dict_array (31.5122,2.92785) 101 +dictGet dict_array (31.5151,0.166429) 101 +dictGet dict_array (31.5174,2.94802) 101 +dictGet dict_array (31.5182,4.18776) 101 +dictGet dict_array (31.5238,1.18793) 101 +dictGet dict_array (31.5271,3.07446) 101 +dictGet dict_array (31.5393,1.58061) 101 +dictGet dict_array (31.5421,3.13711) 101 +dictGet dict_array (31.5479,2.39897) 101 +dictGet dict_array (31.5519,0.99285) 101 +dictGet dict_array (31.5685,3.47987) 101 +dictGet dict_array (31.5959,0.437382) 101 +dictGet dict_array (31.6003,0.194376) 101 +dictGet dict_array (31.6026,2.15457) 101 +dictGet dict_array (31.606,2.45365) 101 +dictGet dict_array (31.6062,-0.453441) 101 +dictGet dict_array (31.6107,1.35247) 101 +dictGet dict_array (31.6155,3.85588) 101 +dictGet dict_array (31.6222,2.03326) 101 +dictGet dict_array (31.6231,-0.123059) 101 +dictGet dict_array (31.6244,1.6885599999999998) 101 +dictGet dict_array (31.6459,0.669716) 101 +dictGet dict_array (31.6563,-0.0644741) 101 +dictGet dict_array (31.6618,-0.551121) 101 +dictGet dict_array (31.6725,-0.38922) 101 +dictGet dict_array (31.6727,4.10336) 101 +dictGet dict_array (31.6739,4.1391) 101 +dictGet dict_array (31.6897,2.8694699999999997) 101 +dictGet dict_array (31.6902,3.98792) 101 +dictGet dict_array (31.6945,2.46687) 101 +dictGet dict_array (31.6987,-1.3796) 101 +dictGet dict_array (31.7012,2.34845) 101 +dictGet dict_array (31.7036,0.0228348) 101 +dictGet dict_array (31.7046,3.68111) 101 +dictGet dict_array (31.7055,2.92556) 101 +dictGet dict_array (31.7102,1.04532) 101 +dictGet dict_array (31.7149,-0.443302) 101 +dictGet dict_array (31.7195,2.99311) 101 +dictGet dict_array (31.7274,0.166719) 101 +dictGet dict_array (31.7565,-0.565382) 101 +dictGet dict_array (31.7615,0.771626) 101 +dictGet dict_array (31.7739,1.8970099999999999) 101 +dictGet dict_array (31.7848,1.2623199999999999) 101 +dictGet dict_array (31.7912,-0.788599) 101 +dictGet dict_array (31.8011,2.65853) 101 +dictGet dict_array (31.8032,-0.0590108) 101 +dictGet dict_array (31.8038,1.9618799999999998) 101 +dictGet dict_array (31.8098,-1.46851) 101 +dictGet dict_array (31.8131,3.41982) 101 +dictGet dict_array (31.8169,3.31059) 101 +dictGet dict_array (31.8202,-0.193692) 101 +dictGet dict_array (31.8306,1.57586) 101 +dictGet dict_array (31.8382,-0.787948) 101 +dictGet dict_array (31.8433,2.49692) 101 +dictGet dict_array (31.8436,2.41851) 101 +dictGet dict_array (31.8563,-1.10787) 101 +dictGet dict_array (31.8683,0.996504) 101 +dictGet dict_array (31.8693,-0.828142) 101 +dictGet dict_array (31.8723,1.08929) 101 +dictGet dict_array (31.8737,0.881127) 101 +dictGet dict_array (31.8881,-0.58441) 101 +dictGet dict_array (31.9011,0.121349) 101 +dictGet dict_array (31.9066,2.13045) 101 +dictGet dict_array (31.9142,1.03368) 101 +dictGet dict_array (31.9155,3.38363) 101 +dictGet dict_array (31.9168,1.3166) 101 +dictGet dict_array (31.9185,-1.11879) 101 +dictGet dict_array (31.9186,-0.647948) 101 +dictGet dict_array (31.9311,3.96928) 101 +dictGet dict_array (31.9335,1.47048) 101 +dictGet dict_array (31.9443,-1.36175) 101 +dictGet dict_array (31.9481,2.34231) 101 +dictGet dict_array (31.9526,1.36565) 101 +dictGet dict_array (31.9629,2.5208399999999997) 101 +dictGet dict_array (31.9765,0.975783) 101 +dictGet dict_array (31.9923,3.31773) 101 +dictGet dict_array (31.9994,0.972816) 101 +dictGet dict_array (32.001,3.47425) 101 +dictGet dict_array (32.0127,2.13874) 101 +dictGet dict_array (32.0244,3.2092) 101 +dictGet dict_array (32.029,1.18039) 101 +dictGet dict_array (32.0315,0.566073) 101 +dictGet dict_array (32.0354,1.0766499999999999) 101 +dictGet dict_array (32.0399,-1.11576) 101 +dictGet dict_array (32.053,2.16849) 101 +dictGet dict_array (32.0542,0.042328) 101 +dictGet dict_array (32.0576,2.47001) 101 +dictGet dict_array (32.061,3.7498899999999997) 101 +dictGet dict_array (32.0623,1.25134) 101 +dictGet dict_array (32.0626,1.9611399999999999) 101 +dictGet dict_array (32.0666,-0.0904247) 101 +dictGet dict_array (32.0681,2.28442) 101 +dictGet dict_array (32.0692,1.50869) 101 +dictGet dict_array (32.0724,4.03314) 101 +dictGet dict_array (32.0729,-0.064324) 101 +dictGet dict_array (32.079,0.293758) 101 +dictGet dict_array (32.0847,-1.19814) 101 +dictGet dict_array (32.0974,-0.91927) 101 +dictGet dict_array (32.0979,-0.736979) 101 +dictGet dict_array (32.106,-1.33063) 101 +dictGet dict_array (32.1189,0.246715) 101 +dictGet dict_array (32.1207,4.00883) 101 +dictGet dict_array (32.1396,1.12402) 101 +dictGet dict_array (32.1413,1.5668) 101 +dictGet dict_array (32.143,1.35559) 101 +dictGet dict_array (32.1538,1.32881) 101 +dictGet dict_array (32.1549,4.06552) 101 +dictGet dict_array (32.1555,-0.79275) 101 +dictGet dict_array (32.163,1.17733) 101 +dictGet dict_array (32.1634,2.94273) 101 +dictGet dict_array (32.1644,1.85666) 101 +dictGet dict_array (32.1745,0.435458) 101 +dictGet dict_array (32.1765,1.65149) 101 +dictGet dict_array (32.1893,2.08924) 101 +dictGet dict_array (32.2024,0.222191) 101 +dictGet dict_array (32.2107,1.34379) 101 +dictGet dict_array (32.2109,3.9018699999999997) 101 +dictGet dict_array (32.2123,1.85233) 101 +dictGet dict_array (32.2144,3.72534) 101 +dictGet dict_array (32.2218,2.5386699999999998) 101 +dictGet dict_array (32.2279,2.84267) 101 +dictGet dict_array (32.2345,3.33295) 101 +dictGet dict_array (32.2435,3.85283) 101 +dictGet dict_array (32.2527,-0.480608) 101 +dictGet dict_array (32.2566,-0.837882) 101 +dictGet dict_array (32.2627,2.57708) 101 +dictGet dict_array (32.2733,0.244931) 101 +dictGet dict_array (32.2761,4.05808) 101 +dictGet dict_array (32.2764,3.78472) 101 +dictGet dict_array (32.2814,-1.26011) 101 +dictGet dict_array (32.2861,3.02427) 101 +dictGet dict_array (32.2924,0.928609) 101 +dictGet dict_array (32.2963,-0.78543) 101 +dictGet dict_array (32.3039,3.21175) 101 +dictGet dict_array (32.3107,0.698287) 101 +dictGet dict_array (32.3138,0.0595677) 101 +dictGet dict_array (32.3339,0.707056) 101 +dictGet dict_array (32.3351,0.415474) 101 +dictGet dict_array (32.342,-0.681023) 101 +dictGet dict_array (32.3463,1.83196) 101 +dictGet dict_array (32.3494,2.43799) 101 +dictGet dict_array (32.3524,3.47049) 101 +dictGet dict_array (32.3531,2.33115) 101 +dictGet dict_array (32.3602,0.116106) 101 +dictGet dict_array (32.3612,1.1598) 101 +dictGet dict_array (32.3689,3.34847) 101 +dictGet dict_array (32.3695,0.734055) 101 +dictGet dict_array (32.3825,3.85017) 101 +dictGet dict_array (32.3835,-1.25491) 101 +dictGet dict_array (32.4018,-0.728568) 101 +dictGet dict_array (32.4044,2.96727) 101 +dictGet dict_array (32.4101,2.9988) 101 +dictGet dict_array (32.417,-1.12908) 101 +dictGet dict_array (32.4172,4.1952) 101 +dictGet dict_array (32.4239,2.49512) 101 +dictGet dict_array (32.4258,4.05137) 101 +dictGet dict_array (32.4264,-0.427357) 101 +dictGet dict_array (32.4274,3.59377) 101 +dictGet dict_array (32.4286,-1.24757) 101 +dictGet dict_array (32.4294,3.0665) 101 +dictGet dict_array (32.4333,-0.353347) 101 +dictGet dict_array (32.4391,3.64421) 101 +dictGet dict_array (32.4401,3.70635) 101 +dictGet dict_array (32.45,1.68918) 101 +dictGet dict_array (32.4507,-0.133471) 101 +dictGet dict_array (32.4592,0.976458) 101 +dictGet dict_array (32.4595,1.89135) 101 +dictGet dict_array (32.4604,0.280248) 101 +dictGet dict_array (32.4835,0.472731) 101 +dictGet dict_array (32.4855,2.01938) 101 +dictGet dict_array (32.4872,2.01697) 101 +dictGet dict_array (32.4911,0.613106) 101 +dictGet dict_array (32.4918,2.17834) 101 +dictGet dict_array (32.4947,2.34595) 101 +dictGet dict_array (32.5035,2.92234) 101 +dictGet dict_array (32.5132,-0.331206) 101 +dictGet dict_array (32.5156,-0.412604) 7652581 +dictGet dict_array (32.5158,2.9067499999999997) 101 +dictGet dict_array (32.5249,2.44519) 101 +dictGet dict_array (32.5293,-0.790952) 101 +dictGet dict_array (32.5319,3.96854) 101 +dictGet dict_array (32.5518,3.6093) 101 +dictGet dict_array (32.5541,3.5225400000000002) 101 +dictGet dict_array (32.5569,0.816123) 101 +dictGet dict_array (32.5646,1.9775) 101 +dictGet dict_array (32.5733,3.81271) 101 +dictGet dict_array (32.5767,0.948327) 101 +dictGet dict_array (32.5971,1.76179) 101 +dictGet dict_array (32.6035,-0.716157) 101 +dictGet dict_array (32.6087,4.21614) 101 +dictGet dict_array (32.6171,0.024481) 101 +dictGet dict_array (32.6189,-0.775391) 101 +dictGet dict_array (32.6198,2.92081) 101 +dictGet dict_array (32.621,-0.970784) 101 +dictGet dict_array (32.6266,0.650009) 101 +dictGet dict_array (32.6315,2.15144) 101 +dictGet dict_array (32.6385,-0.436803) 101 +dictGet dict_array (32.6449,-0.191292) 101 +dictGet dict_array (32.6535,2.10385) 101 +dictGet dict_array (32.6592,3.49973) 101 +dictGet dict_array (32.6598,2.5980600000000003) 101 +dictGet dict_array (32.6612,2.95681) 101 +dictGet dict_array (32.6636,-0.57235) 101 +dictGet dict_array (32.669,-0.382702) 101 +dictGet dict_array (32.6752,1.30748) 101 +dictGet dict_array (32.6811,2.9559800000000003) 101 +dictGet dict_array (32.6821,0.57336) 101 +dictGet dict_array (32.6828,3.91304) 101 +dictGet dict_array (32.6979,3.96868) 101 +dictGet dict_array (32.6983,3.15784) 101 +dictGet dict_array (32.7122,0.794293) 101 +dictGet dict_array (32.7131,-0.847256) 101 +dictGet dict_array (32.7219,0.883461) 101 +dictGet dict_array (32.7228,1.78808) 101 +dictGet dict_array (32.7273,-0.206908) 101 +dictGet dict_array (32.7292,0.259331) 101 +dictGet dict_array (32.7304,-1.38317) 101 +dictGet dict_array (32.7353,1.01601) 101 +dictGet dict_array (32.7354,4.17574) 101 +dictGet dict_array (32.7357,-0.190194) 101 +dictGet dict_array (32.7465,-1.37598) 101 +dictGet dict_array (32.7494,-0.275675) 101 +dictGet dict_array (32.7514,0.128951) 101 +dictGet dict_array (32.753,3.44207) 101 +dictGet dict_array (32.7686,2.11713) 101 +dictGet dict_array (32.7694,1.47159) 101 +dictGet dict_array (32.7768,0.0401042) 101 +dictGet dict_array (32.781,-1.34283) 101 +dictGet dict_array (32.7814,1.73876) 101 +dictGet dict_array (32.7856,-1.06363) 101 +dictGet dict_array (32.792699999999996,-1.1255600000000001) 101 +dictGet dict_array (32.7941,-0.645447) 101 +dictGet dict_array (32.7946,1.48889) 101 +dictGet dict_array (32.797,0.791753) 101 +dictGet dict_array (32.7982,-0.537798) 101 +dictGet dict_array (32.8091,2.3611) 101 +dictGet dict_array (32.81,1.7130800000000002) 101 +dictGet dict_array (32.8174,-0.288322) 101 +dictGet dict_array (32.823,1.6546699999999999) 101 +dictGet dict_array (32.8233,1.62108) 101 +dictGet dict_array (32.8428,-0.400045) 101 +dictGet dict_array (32.8479,2.13598) 101 +dictGet dict_array (32.8524,0.199902) 101 +dictGet dict_array (32.8543,3.23553) 101 +dictGet dict_array (32.8562,1.31371) 101 +dictGet dict_array (32.87,1.44256) 101 +dictGet dict_array (32.8789,2.38192) 101 +dictGet dict_array (32.8812,2.20734) 5999168 +dictGet dict_array (32.8815,-0.54427) 101 +dictGet dict_array (32.8853,2.4859) 5999168 +dictGet dict_array (32.8909,0.513964) 101 +dictGet dict_array (32.9035,2.38999) 101 +dictGet dict_array (32.9097,2.48131) 5999168 +dictGet dict_array (32.928,-0.943269) 101 +dictGet dict_array (32.9322,1.13165) 101 +dictGet dict_array (32.9348,1.22606) 101 +dictGet dict_array (32.9417,3.77998) 101 +dictGet dict_array (32.9428,3.11936) 101 +dictGet dict_array (32.9482,1.18092) 101 +dictGet dict_array (32.9506,0.0609364) 101 +dictGet dict_array (32.953,-0.828308) 101 +dictGet dict_array (32.9593,3.5209099999999998) 101 +dictGet dict_array (32.9617,2.07711) 5999168 +dictGet dict_array (32.966,0.693749) 101 +dictGet dict_array (32.9668,-0.716432) 101 +dictGet dict_array (32.9702,1.98555) 101 +dictGet dict_array (32.9782,1.73819) 101 +dictGet dict_array (32.9805,3.71151) 101 +dictGet dict_array (32.9821,2.97225) 101 +dictGet dict_array (32.995,-0.830301) 101 +dictGet dict_array (33.0234,0.770848) 101 +dictGet dict_array (33.0312,-0.340964) 101 +dictGet dict_array (33.0366,-0.756795) 101 +dictGet dict_array (33.0438,0.812871) 101 +dictGet dict_array (33.0455,1.84843) 101 +dictGet dict_array (33.0498,0.0913292) 101 +dictGet dict_array (33.0506,1.53739) 101 +dictGet dict_array (33.0554,2.4265) 101 +dictGet dict_array (33.0741,3.61332) 101 +dictGet dict_array (33.0765,-0.179985) 101 +dictGet dict_array (33.087,1.46465) 101 +dictGet dict_array (33.0906,-0.620383) 101 +dictGet dict_array (33.1047,-1.28027) 101 +dictGet dict_array (33.1072,1.96303) 101 +dictGet dict_array (33.1081,-0.897874) 101 +dictGet dict_array (33.1122,1.8950200000000001) 101 +dictGet dict_array (33.1237,2.63993) 101 +dictGet dict_array (33.1238,0.753963) 101 +dictGet dict_array (33.1257,0.495668) 101 +dictGet dict_array (33.1258,1.78341) 101 +dictGet dict_array (33.127,2.59646) 101 +dictGet dict_array (33.1324,-1.23742) 101 +dictGet dict_array (33.1359,3.83491) 101 +dictGet dict_array (33.1628,-0.379588) 101 +dictGet dict_array (33.1679,1.25601) 101 +dictGet dict_array (33.1688,-1.35553) 101 +dictGet dict_array (33.181,2.10943) 101 +dictGet dict_array (33.1871,2.81171) 101 +dictGet dict_array (33.1877,0.771297) 101 +dictGet dict_array (33.1883,-0.204797) 101 +dictGet dict_array (33.1886,3.27998) 101 +dictGet dict_array (33.1955,0.708907) 101 +dictGet dict_array (33.2044,-0.769275) 101 +dictGet dict_array (33.2182,3.36103) 101 +dictGet dict_array (33.2192,3.43586) 101 +dictGet dict_array (33.2322,-0.916753) 101 +dictGet dict_array (33.2359,-0.81321) 101 +dictGet dict_array (33.238,0.635072) 101 +dictGet dict_array (33.2398,3.02588) 101 +dictGet dict_array (33.2469,2.35698) 101 +dictGet dict_array (33.247,2.3327) 101 +dictGet dict_array (33.2579,2.8027100000000003) 101 +dictGet dict_array (33.2607,0.321082) 101 +dictGet dict_array (33.2653,0.243336) 101 +dictGet dict_array (33.2758,0.831836) 101 +dictGet dict_array (33.2771,0.886536) 101 +dictGet dict_array (33.2914,1.16026) 101 +dictGet dict_array (33.2914,1.38882) 101 +dictGet dict_array (33.2982,-1.16604) 101 +dictGet dict_array (33.2985,0.842556) 101 +dictGet dict_array (33.3005,2.8338900000000002) 101 +dictGet dict_array (33.305,0.0969475) 101 +dictGet dict_array (33.3072,3.82163) 101 +dictGet dict_array (33.312,3.41475) 101 +dictGet dict_array (33.3129,2.46048) 101 +dictGet dict_array (33.3134,3.46863) 101 +dictGet dict_array (33.3203,2.33139) 101 +dictGet dict_array (33.324,0.433701) 101 +dictGet dict_array (33.3338,2.44705) 101 +dictGet dict_array (33.337,4.06475) 101 +dictGet dict_array (33.3469,1.08172) 101 +dictGet dict_array (33.3538,0.717896) 101 +dictGet dict_array (33.3618,1.37899) 101 +dictGet dict_array (33.3698,0.547744) 101 +dictGet dict_array (33.3705,0.957619) 101 +dictGet dict_array (33.3821,3.07258) 101 +dictGet dict_array (33.3881,3.0626) 101 +dictGet dict_array (33.393,-0.816186) 101 +dictGet dict_array (33.3945,0.869508) 101 +dictGet dict_array (33.4001,1.24186) 101 +dictGet dict_array (33.4008,2.34911) 101 +dictGet dict_array (33.4166,-1.2808899999999999) 101 +dictGet dict_array (33.4167,3.0655) 101 +dictGet dict_array (33.4204,2.81887) 101 +dictGet dict_array (33.4211,1.71128) 101 +dictGet dict_array (33.4237,2.91761) 101 +dictGet dict_array (33.4266,1.5955599999999999) 101 +dictGet dict_array (33.4353,-0.391392) 101 +dictGet dict_array (33.4362,-0.134658) 101 +dictGet dict_array (33.4386,0.15396) 101 +dictGet dict_array (33.4421,-0.50712) 101 +dictGet dict_array (33.452,0.915829) 101 +dictGet dict_array (33.463,-0.0882717) 101 +dictGet dict_array (33.464,-1.00949) 101 +dictGet dict_array (33.4692,0.954092) 101 +dictGet dict_array (33.4716,1.9538799999999998) 101 +dictGet dict_array (33.4756,1.85836) 101 +dictGet dict_array (33.4859,4.0751) 101 +dictGet dict_array (33.4899,3.54193) 101 +dictGet dict_array (33.4935,3.49794) 101 +dictGet dict_array (33.494,-0.983356) 101 +dictGet dict_array (33.4955,-1.28128) 101 +dictGet dict_array (33.4965,-0.278687) 101 +dictGet dict_array (33.4991,0.647491) 101 +dictGet dict_array (33.5076,2.2272) 101 +dictGet dict_array (33.5079,-0.498199) 101 +dictGet dict_array (33.5157,0.535034) 101 +dictGet dict_array (33.5171,2.49677) 101 +dictGet dict_array (33.5255,2.4447200000000002) 101 +dictGet dict_array (33.526,4.01194) 101 +dictGet dict_array (33.5288,0.789434) 101 +dictGet dict_array (33.5356,-1.17671) 101 +dictGet dict_array (33.5402,1.49152) 101 +dictGet dict_array (33.5418,3.45757) 101 +dictGet dict_array (33.5428,1.90712) 101 +dictGet dict_array (33.5556,-0.55741) 101 +dictGet dict_array (33.5564,0.876858) 101 +dictGet dict_array (33.5567,-0.10208) 101 +dictGet dict_array (33.5645,-0.124824) 101 +dictGet dict_array (33.5663,3.4872) 101 +dictGet dict_array (33.5716,-0.0107611) 101 +dictGet dict_array (33.578,3.55714) 101 +dictGet dict_array (33.5826,-0.49076) 101 +dictGet dict_array (33.5909,0.773737) 101 +dictGet dict_array (33.5958,2.9619999999999997) 5994231 +dictGet dict_array (33.6193,-0.919755) 101 +dictGet dict_array (33.6313,0.652132) 101 +dictGet dict_array (33.632,0.823351) 101 +dictGet dict_array (33.66,2.18998) 101 +dictGet dict_array (33.6621,0.535395) 101 +dictGet dict_array (33.6726,3.19367) 101 +dictGet dict_array (33.6912,1.74522) 101 +dictGet dict_array (33.705,0.706397) 101 +dictGet dict_array (33.7076,0.7622) 101 +dictGet dict_array (33.7112,1.70187) 101 +dictGet dict_array (33.7246,-1.14837) 101 +dictGet dict_array (33.7326,2.62413) 5994231 +dictGet dict_array (33.7332,2.82137) 5994231 +dictGet dict_array (33.7434,0.394672) 101 +dictGet dict_array (33.7443,1.54557) 101 +dictGet dict_array (33.7506,1.57317) 101 +dictGet dict_array (33.7526,1.8578999999999999) 101 +dictGet dict_array (33.766,4.15013) 101 +dictGet dict_array (33.7834,2.41789) 101 +dictGet dict_array (33.7864,0.230935) 101 +dictGet dict_array (33.7965,3.05709) 101 +dictGet dict_array (33.7998,3.32881) 101 +dictGet dict_array (33.8003,2.97338) 5994231 +dictGet dict_array (33.8007,-1.08962) 101 +dictGet dict_array (33.8022,-0.139488) 101 +dictGet dict_array (33.8065,2.70857) 5994231 +dictGet dict_array (33.8169,-0.607788) 101 +dictGet dict_array (33.8203,0.108512) 101 +dictGet dict_array (33.8231,-1.03449) 101 +dictGet dict_array (33.8312,3.49458) 101 +dictGet dict_array (33.8342,0.297518) 101 +dictGet dict_array (33.8352,0.165872) 101 +dictGet dict_array (33.8354,1.87277) 101 +dictGet dict_array (33.8371,1.60103) 101 +dictGet dict_array (33.8387,1.9968) 101 +dictGet dict_array (33.8403,3.5805) 101 +dictGet dict_array (33.8414,-0.703067) 101 +dictGet dict_array (33.844,-0.179472) 101 +dictGet dict_array (33.8468,3.40137) 101 +dictGet dict_array (33.8509,4.15334) 101 +dictGet dict_array (33.8539,2.38339) 101 +dictGet dict_array (33.858,-1.3122500000000001) 101 +dictGet dict_array (33.859,3.72626) 101 +dictGet dict_array (33.8616,2.24433) 101 +dictGet dict_array (33.8621,3.01035) 101 +dictGet dict_array (33.8623,1.17559) 101 +dictGet dict_array (33.8682,2.706) 5994231 +dictGet dict_array (33.8684,0.189231) 101 +dictGet dict_array (33.872,1.93574) 101 +dictGet dict_array (33.8844,3.80404) 101 +dictGet dict_array (33.8888,0.594884) 101 +dictGet dict_array (33.8946,2.74161) 101 +dictGet dict_array (33.9023,0.6239) 101 +dictGet dict_array (33.9057,0.873222) 101 +dictGet dict_array (33.9157,-1.26607) 101 +dictGet dict_array (33.92,2.06848) 101 +dictGet dict_array (33.9298,-0.00526229) 101 +dictGet dict_array (33.932,3.07063) 101 +dictGet dict_array (33.9322,0.629385) 101 +dictGet dict_array (33.9367,-1.41955) 101 +dictGet dict_array (33.937,1.42532) 101 +dictGet dict_array (33.9375,1.1467100000000001) 101 +dictGet dict_array (33.9434,-1.05739) 101 +dictGet dict_array (33.9477,3.34809) 101 +dictGet dict_array (33.95,2.21715) 101 +dictGet dict_array (33.955799999999996,0.305176) 101 +dictGet dict_array (33.9686,-0.28273) 101 +dictGet dict_array (33.9703,4.1255) 101 +dictGet dict_array (33.9707,3.08199) 101 +dictGet dict_array (33.9754,1.06203) 101 +dictGet dict_array (33.9757,3.72468) 101 +dictGet dict_array (33.9775,-0.0440599) 101 +dictGet dict_array (33.9777,-0.251484) 101 +dictGet dict_array (33.9789,-0.339374) 101 +dictGet dict_array (33.9849,2.54515) 5994231 +dictGet dict_array (33.9885,-0.318557) 101 +dictGet dict_array (33.9977,1.07175) 101 +dictGet dict_array (33.9984,-0.700517) 101 +dictGet dict_array (34.0149,3.53338) 101 +dictGet dict_array (34.0173,3.39155) 101 +dictGet dict_array (34.0317,3.9579) 101 +dictGet dict_array (34.0369,3.83612) 101 +dictGet dict_array (34.043,-0.0887221) 101 +dictGet dict_array (34.0487,1.14252) 101 +dictGet dict_array (34.052,1.74832) 101 +dictGet dict_array (34.0711,-0.898071) 101 +dictGet dict_array (34.0747,1.55057) 101 +dictGet dict_array (34.0803,3.16763) 101 +dictGet dict_array (34.0872,3.75555) 101 +dictGet dict_array (34.0965,1.62038) 101 +dictGet dict_array (34.0977,-0.412691) 101 +dictGet dict_array (34.0986,0.0294206) 101 +dictGet dict_array (34.1072,3.15823) 101 +dictGet dict_array (34.1092,3.09599) 101 +dictGet dict_array (34.1206,1.04637) 5940222 +dictGet dict_array (34.1209,3.13826) 101 +dictGet dict_array (34.1265,3.95881) 101 +dictGet dict_array (34.1286,-0.539319) 101 +dictGet dict_array (34.1358,3.67451) 101 +dictGet dict_array (34.1428,0.136115) 101 +dictGet dict_array (34.157,1.73522) 101 +dictGet dict_array (34.1581,1.48001) 101 +dictGet dict_array (34.1682,3.42373) 101 +dictGet dict_array (34.1683,-1.26511) 101 +dictGet dict_array (34.1684,4.20007) 101 +dictGet dict_array (34.1854,3.32089) 101 +dictGet dict_array (34.2022,0.749536) 101 +dictGet dict_array (34.2044,3.04865) 101 +dictGet dict_array (34.22,-0.500055) 101 +dictGet dict_array (34.2249,0.743775) 101 +dictGet dict_array (34.2254,1.34702) 101 +dictGet dict_array (34.2355,-0.898843) 101 +dictGet dict_array (34.2394,2.0203699999999998) 101 +dictGet dict_array (34.2466,1.83785) 101 +dictGet dict_array (34.247,4.09563) 101 +dictGet dict_array (34.2508,2.61312) 101 +dictGet dict_array (34.2517,1.69642) 101 +dictGet dict_array (34.2564,4.13033) 101 +dictGet dict_array (34.2574,4.18928) 101 +dictGet dict_array (34.2614,-0.478719) 101 +dictGet dict_array (34.2625,2.38088) 101 +dictGet dict_array (34.2666,3.1503) 101 +dictGet dict_array (34.271,4.02223) 101 +dictGet dict_array (34.2727,0.514755) 101 +dictGet dict_array (34.278,1.98929) 101 +dictGet dict_array (34.2798,-0.199208) 101 +dictGet dict_array (34.2804,2.05184) 101 +dictGet dict_array (34.2945,-1.11051) 101 +dictGet dict_array (34.3168,-0.0829721) 101 +dictGet dict_array (34.3345,3.4358) 101 +dictGet dict_array (34.3377,1.13527) 5940222 +dictGet dict_array (34.3383,1.27891) 5940222 +dictGet dict_array (34.3391,1.47945) 5940222 +dictGet dict_array (34.3441,0.627014) 101 +dictGet dict_array (34.347,2.4853) 101 +dictGet dict_array (34.3514,2.16247) 101 +dictGet dict_array (34.3627,2.64533) 101 +dictGet dict_array (34.3682,-0.227501) 101 +dictGet dict_array (34.3756,4.21248) 101 +dictGet dict_array (34.379,3.96604) 101 +dictGet dict_array (34.3827,1.7518) 101 +dictGet dict_array (34.3912,2.8834) 101 +dictGet dict_array (34.3919,0.668829) 101 +dictGet dict_array (34.3949,2.00338) 101 +dictGet dict_array (34.3987,0.557268) 101 +dictGet dict_array (34.4111,0.768558) 101 +dictGet dict_array (34.4119,2.8742) 101 +dictGet dict_array (34.416,3.50841) 101 +dictGet dict_array (34.4212,1.24916) 5940222 +dictGet dict_array (34.4251,0.457029) 101 +dictGet dict_array (34.4274,-0.902559) 101 +dictGet dict_array (34.4325,4.03159) 101 +dictGet dict_array (34.438,1.63994) 101 +dictGet dict_array (34.4403,-0.177594) 101 +dictGet dict_array (34.4421,0.726712) 101 +dictGet dict_array (34.4517,2.98611) 101 +dictGet dict_array (34.4658,-1.312) 101 +dictGet dict_array (34.4732,-0.0681338) 101 +dictGet dict_array (34.4752,2.81646) 101 +dictGet dict_array (34.4914,2.3858) 101 +dictGet dict_array (34.4923,0.855231) 101 +dictGet dict_array (34.5235,1.78468) 101 +dictGet dict_array (34.5305,4.10608) 101 +dictGet dict_array (34.5389,0.621937) 101 +dictGet dict_array (34.5406,3.17145) 101 +dictGet dict_array (34.5434,-0.56306) 101 +dictGet dict_array (34.5449,3.13311) 101 +dictGet dict_array (34.5491,2.31572) 101 +dictGet dict_array (34.5539,2.94028) 101 +dictGet dict_array (34.5546,-0.208825) 101 +dictGet dict_array (34.5549,3.78486) 101 +dictGet dict_array (34.5676,0.307148) 101 +dictGet dict_array (34.5743,1.5217399999999999) 101 +dictGet dict_array (34.5775,3.48046) 101 +dictGet dict_array (34.5815,2.5243700000000002) 101 +dictGet dict_array (34.5841,4.21191) 101 +dictGet dict_array (34.5887,2.65083) 101 +dictGet dict_array (34.5937,3.2143) 101 +dictGet dict_array (34.6013,-1.0612) 101 +dictGet dict_array (34.6089,1.36066) 101 +dictGet dict_array (34.6103,3.40227) 101 +dictGet dict_array (34.6128,1.92276) 101 +dictGet dict_array (34.6175,2.43627) 101 +dictGet dict_array (34.6209,3.43776) 101 +dictGet dict_array (34.6234,2.60237) 101 +dictGet dict_array (34.6275,3.52479) 101 +dictGet dict_array (34.635,0.568558) 101 +dictGet dict_array (34.6373,2.37692) 101 +dictGet dict_array (34.6375,3.52234) 101 +dictGet dict_array (34.6426,2.12397) 101 +dictGet dict_array (34.6513,2.80915) 101 +dictGet dict_array (34.6632,2.30039) 101 +dictGet dict_array (34.6691,1.86582) 101 +dictGet dict_array (34.6739,0.15342) 101 +dictGet dict_array (34.6825,0.0499679) 101 +dictGet dict_array (34.6893,0.454326) 101 +dictGet dict_array (34.6957,-0.358598) 101 +dictGet dict_array (34.6986,0.562679) 101 +dictGet dict_array (34.712,1.12114) 101 +dictGet dict_array (34.7126,-0.0057301) 101 +dictGet dict_array (34.7137,0.0248501) 101 +dictGet dict_array (34.7162,1.15623) 101 +dictGet dict_array (34.7258,3.95142) 101 +dictGet dict_array (34.7347,3.5232099999999997) 101 +dictGet dict_array (34.7363,2.23374) 101 +dictGet dict_array (34.7375,0.397841) 101 +dictGet dict_array (34.7423,3.09198) 101 +dictGet dict_array (34.7452,3.09029) 101 +dictGet dict_array (34.7539,-1.06943) 101 +dictGet dict_array (34.7733,-0.00912717) 101 +dictGet dict_array (34.774,2.71088) 101 +dictGet dict_array (34.7771,1.46009) 101 +dictGet dict_array (34.7782,-1.28308) 101 +dictGet dict_array (34.7924,3.63564) 101 +dictGet dict_array (34.7939,-0.416676) 101 +dictGet dict_array (34.7964,-0.401773) 101 +dictGet dict_array (34.7974,0.0286873) 101 +dictGet dict_array (34.7975,3.05965) 101 +dictGet dict_array (34.8037,3.07263) 101 +dictGet dict_array (34.8254,-0.390284) 101 +dictGet dict_array (34.828,1.91869) 101 +dictGet dict_array (34.8289,3.71058) 101 +dictGet dict_array (34.8403,2.14606) 101 +dictGet dict_array (34.8437,2.20617) 101 +dictGet dict_array (34.8469,2.38435) 101 +dictGet dict_array (34.86,1.45705) 101 +dictGet dict_array (34.8612,0.914248) 101 +dictGet dict_array (34.8663,3.4215400000000002) 101 +dictGet dict_array (34.8724,-0.375144) 101 +dictGet dict_array (34.8795,3.29317) 101 +dictGet dict_array (34.8823,1.21988) 101 +dictGet dict_array (34.8834,1.07657) 101 +dictGet dict_array (34.8837,0.157648) 101 +dictGet dict_array (34.8871,-0.9755) 101 +dictGet dict_array (34.8871,1.8943699999999999) 101 +dictGet dict_array (34.889,3.36756) 101 +dictGet dict_array (34.8907,1.24874) 101 +dictGet dict_array (34.8965,3.13508) 101 +dictGet dict_array (34.9042,2.62092) 101 +dictGet dict_array (34.9055,-0.0448967) 101 +dictGet dict_array (34.9122,0.110576) 101 +dictGet dict_array (34.9228,3.60183) 101 +dictGet dict_array (34.9237,1.21715) 101 +dictGet dict_array (34.9296,1.70459) 101 +dictGet dict_array (34.941,-1.14663) 101 +dictGet dict_array (34.9448,1.18923) 101 +dictGet dict_array (34.9462,3.81678) 101 +dictGet dict_array (34.9466,0.593463) 101 +dictGet dict_array (34.9485,0.150307) 101 +dictGet dict_array (34.9542,0.487238) 101 +dictGet dict_array (34.9559,2.03473) 101 +dictGet dict_array (34.9671,-0.960225) 101 +dictGet dict_array (34.9711,2.63444) 101 +dictGet dict_array (34.9892,0.354775) 101 +dictGet dict_array (34.9907,1.40724) 101 +dictGet dict_array (34.9916,-0.00173097) 101 +dictGet dict_array (34.9919,2.06167) 101 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index c2a35a3ef63..fff786d6c06 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,20 +12,18 @@ declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${CURDIR}" $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; -CREATE DATABASE test_01037; -DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +DROP TABLE IF EXISTS points; +CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" rm "${CURDIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_array; -CREATE TABLE test_01037.polygons_array +CREATE TABLE polygons_array ( key Array(Array(Array(Array(Float64)))), name String, @@ -34,7 +32,7 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" rm "${CURDIR}"/01037_polygon_data @@ -43,27 +41,22 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; + DROP DICTIONARY IF EXISTS dict_array; - CREATE DICTIONARY test_01037.dict_array + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_correctness_all.ans" "$outputFile" done - -$CLICKHOUSE_CLIENT -n --query=" -DROP TABLE test_01037.points; -DROP DATABASE test_01037; -" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans index 45fa7637421..297c8416096 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans @@ -1,1000 +1,1000 @@ -dictGet test_01037.dict_array (29.5699,2.50068) 101 -dictGet test_01037.dict_array (29.5796,1.55456) 101 -dictGet test_01037.dict_array (29.5796,2.36864) 101 -dictGet test_01037.dict_array (29.5844,1.59626) 101 -dictGet test_01037.dict_array (29.5886,4.03321) 101 -dictGet test_01037.dict_array (29.5914,3.02628) 101 -dictGet test_01037.dict_array (29.5926,-0.0965169) 101 -dictGet test_01037.dict_array (29.5968,2.37773) 101 -dictGet test_01037.dict_array (29.5984,0.755853) 101 -dictGet test_01037.dict_array (29.6066,3.47173) 101 -dictGet test_01037.dict_array (29.6085,-1.26007) 6489978 -dictGet test_01037.dict_array (29.6131,0.246565) 101 -dictGet test_01037.dict_array (29.6157,-0.266687) 101 -dictGet test_01037.dict_array (29.6164,2.94674) 101 -dictGet test_01037.dict_array (29.6195,-0.591941) 101 -dictGet test_01037.dict_array (29.6231,1.54818) 101 -dictGet test_01037.dict_array (29.6379,0.764114) 101 -dictGet test_01037.dict_array (29.6462,-0.772059) 934530 -dictGet test_01037.dict_array (29.6579,-1.07336) 6489978 -dictGet test_01037.dict_array (29.6618,-0.271842) 101 -dictGet test_01037.dict_array (29.6629,-0.303602) 101 -dictGet test_01037.dict_array (29.6659,-0.782823) 934530 -dictGet test_01037.dict_array (29.6736,-0.113832) 101 -dictGet test_01037.dict_array (29.6759,3.02905) 101 -dictGet test_01037.dict_array (29.6778,3.71898) 101 -dictGet test_01037.dict_array (29.6796,1.10433) 101 -dictGet test_01037.dict_array (29.6809,2.13677) 101 -dictGet test_01037.dict_array (29.6935,4.11894) 101 -dictGet test_01037.dict_array (29.6991,-1.4458199999999999) 101 -dictGet test_01037.dict_array (29.6997,3.17297) 101 -dictGet test_01037.dict_array (29.7043,3.6145899999999997) 101 -dictGet test_01037.dict_array (29.7065,3.24885) 101 -dictGet test_01037.dict_array (29.7126,0.28108) 101 -dictGet test_01037.dict_array (29.7192,0.174273) 101 -dictGet test_01037.dict_array (29.7217,-0.523481) 3501900 -dictGet test_01037.dict_array (29.7271,1.67967) 101 -dictGet test_01037.dict_array (29.7311,4.12444) 101 -dictGet test_01037.dict_array (29.7347,1.88378) 101 -dictGet test_01037.dict_array (29.7358,0.67944) 101 -dictGet test_01037.dict_array (29.7366,-0.2973) 101 -dictGet test_01037.dict_array (29.7446,0.646536) 101 -dictGet test_01037.dict_array (29.7453,-0.567963) 3501900 -dictGet test_01037.dict_array (29.764,4.04217) 101 -dictGet test_01037.dict_array (29.7655,1.51372) 101 -dictGet test_01037.dict_array (29.7744,1.12435) 101 -dictGet test_01037.dict_array (29.7774,-0.0681196) 3501895 -dictGet test_01037.dict_array (29.7784,1.54864) 101 -dictGet test_01037.dict_array (29.7785,2.24139) 101 -dictGet test_01037.dict_array (29.7922,0.220808) 101 -dictGet test_01037.dict_array (29.7936,2.37709) 101 -dictGet test_01037.dict_array (29.8008,0.948536) 101 -dictGet test_01037.dict_array (29.8115,0.201227) 101 -dictGet test_01037.dict_array (29.814,0.149601) 3501895 -dictGet test_01037.dict_array (29.8193,-1.35858) 101 -dictGet test_01037.dict_array (29.8201,0.965518) 101 -dictGet test_01037.dict_array (29.8265,-0.727286) 3501900 -dictGet test_01037.dict_array (29.8277,-0.531746) 3501900 -dictGet test_01037.dict_array (29.8289,3.63009) 101 -dictGet test_01037.dict_array (29.8548,0.838047) 101 -dictGet test_01037.dict_array (29.8641,-0.845265) 3501900 -dictGet test_01037.dict_array (29.8649,0.0562212) 3501895 -dictGet test_01037.dict_array (29.8701,-1.02045) 934530 -dictGet test_01037.dict_array (29.8733,2.76654) 101 -dictGet test_01037.dict_array (29.876,0.555475) 101 -dictGet test_01037.dict_array (29.8794,-0.800108) 3501900 -dictGet test_01037.dict_array (29.8813,2.7426399999999997) 101 -dictGet test_01037.dict_array (29.897100000000002,2.66193) 101 -dictGet test_01037.dict_array (29.908,4.01339) 101 -dictGet test_01037.dict_array (29.9165,-1.08246) 3501894 -dictGet test_01037.dict_array (29.9201,-0.420861) 3498054 -dictGet test_01037.dict_array (29.9217,3.03778) 101 -dictGet test_01037.dict_array (29.9355,0.773833) 101 -dictGet test_01037.dict_array (29.947,3.76517) 101 -dictGet test_01037.dict_array (29.9518,-0.60557) 3498056 -dictGet test_01037.dict_array (29.9564,-0.600163) 3498056 -dictGet test_01037.dict_array (29.959600000000002,4.16591) 101 -dictGet test_01037.dict_array (29.9615,-1.33708) 3501894 -dictGet test_01037.dict_array (29.9699,-0.392375) 3498054 -dictGet test_01037.dict_array (29.9776,1.04552) 101 -dictGet test_01037.dict_array (29.9784,4.02756) 101 -dictGet test_01037.dict_array (29.9819,4.00597) 101 -dictGet test_01037.dict_array (29.9826,1.2816100000000001) 101 -dictGet test_01037.dict_array (30.0026,2.76257) 101 -dictGet test_01037.dict_array (30.0126,3.68255) 101 -dictGet test_01037.dict_array (30.0131,0.796576) 3501892 -dictGet test_01037.dict_array (30.018,1.16523) 101 -dictGet test_01037.dict_array (30.0261,-0.210653) 3501896 -dictGet test_01037.dict_array (30.0472,-1.11007) 3501894 -dictGet test_01037.dict_array (30.0542,-0.479585) 3498054 -dictGet test_01037.dict_array (30.0613,1.6278000000000001) 101 -dictGet test_01037.dict_array (30.0617,-0.0551152) 3501895 -dictGet test_01037.dict_array (30.0637,2.62066) 101 -dictGet test_01037.dict_array (30.0721,1.6424400000000001) 101 -dictGet test_01037.dict_array (30.0769,-0.402636) 3498054 -dictGet test_01037.dict_array (30.0791,-0.277435) 3501896 -dictGet test_01037.dict_array (30.0931,0.0327512) 3501895 -dictGet test_01037.dict_array (30.1059,3.52623) 101 -dictGet test_01037.dict_array (30.1103,0.865466) 3501892 -dictGet test_01037.dict_array (30.1115,2.95243) 101 -dictGet test_01037.dict_array (30.1144,1.71029) 101 -dictGet test_01037.dict_array (30.1311,-0.864751) 3501899 -dictGet test_01037.dict_array (30.1336,-0.851386) 3501899 -dictGet test_01037.dict_array (30.1393,3.89901) 101 -dictGet test_01037.dict_array (30.1456,-0.531898) 3498054 -dictGet test_01037.dict_array (30.1492,2.07833) 101 -dictGet test_01037.dict_array (30.1575,2.43856) 101 -dictGet test_01037.dict_array (30.1682,1.19771) 101 -dictGet test_01037.dict_array (30.1716,3.9853300000000003) 101 -dictGet test_01037.dict_array (30.1849,2.78374) 101 -dictGet test_01037.dict_array (30.1866,0.65658) 3498021 -dictGet test_01037.dict_array (30.1885,1.56943) 101 -dictGet test_01037.dict_array (30.1959,-1.38202) 101 -dictGet test_01037.dict_array (30.1999,1.58413) 101 -dictGet test_01037.dict_array (30.2024,0.713081) 3498021 -dictGet test_01037.dict_array (30.2054,0.620143) 3498021 -dictGet test_01037.dict_array (30.2091,1.51641) 101 -dictGet test_01037.dict_array (30.2124,-0.331782) 3498031 -dictGet test_01037.dict_array (30.226,3.03527) 101 -dictGet test_01037.dict_array (30.2261,3.18486) 101 -dictGet test_01037.dict_array (30.2288,2.48407) 101 -dictGet test_01037.dict_array (30.2345,3.7462400000000002) 101 -dictGet test_01037.dict_array (30.2375,0.62046) 3498021 -dictGet test_01037.dict_array (30.2425,-0.472914) 3498054 -dictGet test_01037.dict_array (30.247,3.95863) 101 -dictGet test_01037.dict_array (30.2494,-0.305093) 3498031 -dictGet test_01037.dict_array (30.2499,2.54337) 101 -dictGet test_01037.dict_array (30.2606,2.16644) 101 -dictGet test_01037.dict_array (30.2672,3.94847) 101 -dictGet test_01037.dict_array (30.2709,-0.136264) 6088794 -dictGet test_01037.dict_array (30.2764,1.18654) 101 -dictGet test_01037.dict_array (30.2765,1.20383) 101 -dictGet test_01037.dict_array (30.2839,1.05762) 3498024 -dictGet test_01037.dict_array (30.286,0.469327) 3498021 -dictGet test_01037.dict_array (30.2927,3.1693) 101 -dictGet test_01037.dict_array (30.2935,3.49854) 101 -dictGet test_01037.dict_array (30.307,0.312338) 3498021 -dictGet test_01037.dict_array (30.3085,1.07791) 3498024 -dictGet test_01037.dict_array (30.3139,2.77248) 101 -dictGet test_01037.dict_array (30.314,0.822823) 3498024 -dictGet test_01037.dict_array (30.3227,-0.587351) 3498055 -dictGet test_01037.dict_array (30.332,1.00174) 3498024 -dictGet test_01037.dict_array (30.3388,0.844148) 3498024 -dictGet test_01037.dict_array (30.3485,0.561902) 3498021 -dictGet test_01037.dict_array (30.3497,0.180362) 6489998 -dictGet test_01037.dict_array (30.361,4.13016) 101 -dictGet test_01037.dict_array (30.3623,-0.0484027) 6489998 -dictGet test_01037.dict_array (30.3638,3.9845800000000002) 101 -dictGet test_01037.dict_array (30.3853,3.16051) 101 -dictGet test_01037.dict_array (30.3974,2.6617800000000003) 101 -dictGet test_01037.dict_array (30.4002,-1.15886) 101 -dictGet test_01037.dict_array (30.4008,-0.387015) 3498031 -dictGet test_01037.dict_array (30.4018,1.86493) 101 -dictGet test_01037.dict_array (30.4239,1.16818) 3498024 -dictGet test_01037.dict_array (30.4363,3.63938) 101 -dictGet test_01037.dict_array (30.4377,-0.81315) 3498063 -dictGet test_01037.dict_array (30.4391,3.54703) 101 -dictGet test_01037.dict_array (30.4424,-1.39435) 101 -dictGet test_01037.dict_array (30.4441,2.8463000000000003) 101 -dictGet test_01037.dict_array (30.4517,3.28117) 101 -dictGet test_01037.dict_array (30.4658,2.6928) 101 -dictGet test_01037.dict_array (30.4734,2.66161) 101 -dictGet test_01037.dict_array (30.4799,-1.07578) 101 -dictGet test_01037.dict_array (30.4837,-1.02486) 3501899 -dictGet test_01037.dict_array (30.485,1.06326) 3498024 -dictGet test_01037.dict_array (30.495,1.12306) 101 -dictGet test_01037.dict_array (30.501,2.27264) 101 -dictGet test_01037.dict_array (30.5027,1.99382) 101 -dictGet test_01037.dict_array (30.5194,-1.03943) 3501893 -dictGet test_01037.dict_array (30.5239,1.04328) 101 -dictGet test_01037.dict_array (30.528,3.82041) 101 -dictGet test_01037.dict_array (30.5299,-0.715248) 3498063 -dictGet test_01037.dict_array (30.5331,1.19603) 101 -dictGet test_01037.dict_array (30.535800000000002,2.71485) 101 -dictGet test_01037.dict_array (30.5405,0.804694) 3498023 -dictGet test_01037.dict_array (30.542,1.23739) 101 -dictGet test_01037.dict_array (30.5432,4.04189) 101 -dictGet test_01037.dict_array (30.5457,-0.956121) 3501893 -dictGet test_01037.dict_array (30.5506,3.07443) 101 -dictGet test_01037.dict_array (30.5539,3.87084) 101 -dictGet test_01037.dict_array (30.5578,3.78837) 101 -dictGet test_01037.dict_array (30.5588,0.966135) 3498022 -dictGet test_01037.dict_array (30.5637,2.5605) 101 -dictGet test_01037.dict_array (30.5647,-1.27328) 101 -dictGet test_01037.dict_array (30.5656,-0.0581332) 6088794 -dictGet test_01037.dict_array (30.5715,0.65755) 3498023 -dictGet test_01037.dict_array (30.5727,3.01604) 101 -dictGet test_01037.dict_array (30.5729,-0.976857) 3501893 -dictGet test_01037.dict_array (30.5751,0.60204) 3498023 -dictGet test_01037.dict_array (30.5854,3.02473) 101 -dictGet test_01037.dict_array (30.5866,0.174099) 6489998 -dictGet test_01037.dict_array (30.5947,0.875193) 3498023 -dictGet test_01037.dict_array (30.5992,-0.403901) 3498063 -dictGet test_01037.dict_array (30.6002,4.18891) 101 -dictGet test_01037.dict_array (30.6025,0.217712) 6489998 -dictGet test_01037.dict_array (30.6054,0.927203) 3498022 -dictGet test_01037.dict_array (30.6075,3.79359) 101 -dictGet test_01037.dict_array (30.6159,3.82773) 101 -dictGet test_01037.dict_array (30.627,3.84039) 101 -dictGet test_01037.dict_array (30.6308,0.77517) 3498023 -dictGet test_01037.dict_array (30.6338,0.179565) 6489998 -dictGet test_01037.dict_array (30.6461,1.3293599999999999) 101 -dictGet test_01037.dict_array (30.6674,-0.424547) 3498063 -dictGet test_01037.dict_array (30.669,1.76539) 101 -dictGet test_01037.dict_array (30.6788,4.01239) 101 -dictGet test_01037.dict_array (30.6864,3.59158) 101 -dictGet test_01037.dict_array (30.7049,-0.875413) 3501893 -dictGet test_01037.dict_array (30.705,1.3307) 101 -dictGet test_01037.dict_array (30.7063,-0.473192) 3498063 -dictGet test_01037.dict_array (30.7075,-1.1958199999999999) 101 -dictGet test_01037.dict_array (30.7101,-0.367562) 3498012 -dictGet test_01037.dict_array (30.7203,2.98725) 101 -dictGet test_01037.dict_array (30.7213,2.2745699999999998) 101 -dictGet test_01037.dict_array (30.7446,-0.334144) 3498012 -dictGet test_01037.dict_array (30.7468,3.82967) 101 -dictGet test_01037.dict_array (30.747,-0.384779) 3498012 -dictGet test_01037.dict_array (30.7681,0.904198) 3498022 -dictGet test_01037.dict_array (30.7757,1.78743) 101 -dictGet test_01037.dict_array (30.8021,-0.479212) 3498012 -dictGet test_01037.dict_array (30.8079,-1.40869) 101 -dictGet test_01037.dict_array (30.8206,-0.0608489) 3498012 -dictGet test_01037.dict_array (30.8218,0.43909) 3498023 -dictGet test_01037.dict_array (30.8239,0.10014) 3498012 -dictGet test_01037.dict_array (30.8282,4.15409) 101 -dictGet test_01037.dict_array (30.8288,-0.709528) 3501893 -dictGet test_01037.dict_array (30.8326,0.156011) 3498012 -dictGet test_01037.dict_array (30.8328,-1.03704) 101 -dictGet test_01037.dict_array (30.839,2.15528) 101 -dictGet test_01037.dict_array (30.8452,0.219377) 3498013 -dictGet test_01037.dict_array (30.8463,0.0515355) 3498012 -dictGet test_01037.dict_array (30.8526,2.06614) 101 -dictGet test_01037.dict_array (30.8566,0.517876) 3498023 -dictGet test_01037.dict_array (30.8588,-1.31738) 101 -dictGet test_01037.dict_array (30.8681,0.44207) 3498013 -dictGet test_01037.dict_array (30.8914,1.0072) 3498022 -dictGet test_01037.dict_array (30.897,0.483425) 3498013 -dictGet test_01037.dict_array (30.905,2.8731999999999998) 3501793 -dictGet test_01037.dict_array (30.9051,2.21956) 101 -dictGet test_01037.dict_array (30.9115,4.00663) 101 -dictGet test_01037.dict_array (30.9167,-0.834462) 3501893 -dictGet test_01037.dict_array (30.9252,-1.3289900000000001) 101 -dictGet test_01037.dict_array (30.9314,1.85384) 101 -dictGet test_01037.dict_array (30.9392,2.53236) 3501827 -dictGet test_01037.dict_array (30.9569,2.82038) 3501793 -dictGet test_01037.dict_array (30.9598,-0.641011) 3498012 -dictGet test_01037.dict_array (30.9601,-0.254928) 3498012 -dictGet test_01037.dict_array (30.9623,-1.3886) 101 -dictGet test_01037.dict_array (30.9707,0.888854) 3498022 -dictGet test_01037.dict_array (30.9766,2.81957) 3501793 -dictGet test_01037.dict_array (30.9775,2.69273) 3501793 -dictGet test_01037.dict_array (30.9821,0.587715) 3498013 -dictGet test_01037.dict_array (30.9887,4.0233) 101 -dictGet test_01037.dict_array (30.9914,0.259542) 3498013 -dictGet test_01037.dict_array (30.9986,-1.36832) 101 -dictGet test_01037.dict_array (31.008,0.628999) 3498013 -dictGet test_01037.dict_array (31.0168,-1.17462) 101 -dictGet test_01037.dict_array (31.0237,3.52547) 3501821 -dictGet test_01037.dict_array (31.0306,3.78522) 101 -dictGet test_01037.dict_array (31.0308,-0.72453) 3501893 -dictGet test_01037.dict_array (31.0463,2.41997) 3501825 -dictGet test_01037.dict_array (31.047,0.624184) 3498013 -dictGet test_01037.dict_array (31.0569,0.0706393) 3498015 -dictGet test_01037.dict_array (31.0583,1.3244099999999999) 3501926 -dictGet test_01037.dict_array (31.063,3.23861) 3501793 -dictGet test_01037.dict_array (31.068,0.695575) 3498022 -dictGet test_01037.dict_array (31.0687,1.85675) 101 -dictGet test_01037.dict_array (31.0692,0.254793) 3498014 -dictGet test_01037.dict_array (31.0766,0.828128) 3498022 -dictGet test_01037.dict_array (31.0833,0.0612782) 3498015 -dictGet test_01037.dict_array (31.0833,2.59748) 3501793 -dictGet test_01037.dict_array (31.0861,-1.3778299999999999) 101 -dictGet test_01037.dict_array (31.0874,3.07258) 3501793 -dictGet test_01037.dict_array (31.0882,1.4882) 3501926 -dictGet test_01037.dict_array (31.0924,3.42242) 3501821 -dictGet test_01037.dict_array (31.0927,2.67448) 3501793 -dictGet test_01037.dict_array (31.0936,1.12292) 3498022 -dictGet test_01037.dict_array (31.0952,-0.336928) 3498012 -dictGet test_01037.dict_array (31.0978,3.48482) 3501826 -dictGet test_01037.dict_array (31.1107,3.7513199999999998) 3501826 -dictGet test_01037.dict_array (31.1156,1.19171) 3501926 -dictGet test_01037.dict_array (31.1176,0.223509) 3498015 -dictGet test_01037.dict_array (31.1249,0.946838) 3498022 -dictGet test_01037.dict_array (31.1267,1.48983) 3501926 -dictGet test_01037.dict_array (31.138,-0.289981) 3501898 -dictGet test_01037.dict_array (31.1382,3.02904) 3501793 -dictGet test_01037.dict_array (31.1475,2.6178) 3501793 -dictGet test_01037.dict_array (31.1491,1.37873) 3501926 -dictGet test_01037.dict_array (31.1525,3.72105) 3501826 -dictGet test_01037.dict_array (31.1526,-1.4129800000000001) 101 -dictGet test_01037.dict_array (31.1526,-0.186457) 3501898 -dictGet test_01037.dict_array (31.1539,2.78789) 3501793 -dictGet test_01037.dict_array (31.1548,-1.08552) 101 -dictGet test_01037.dict_array (31.1567,-0.0768925) 3501898 -dictGet test_01037.dict_array (31.1613,1.49617) 3501926 -dictGet test_01037.dict_array (31.1653,1.03777) 3498022 -dictGet test_01037.dict_array (31.1662,3.4214700000000002) 3501826 -dictGet test_01037.dict_array (31.1672,-0.0813169) 3501898 -dictGet test_01037.dict_array (31.177,0.440843) 3498014 -dictGet test_01037.dict_array (31.1788,-0.737151) 3501893 -dictGet test_01037.dict_array (31.1856,-0.144396) 3501898 -dictGet test_01037.dict_array (31.1959,3.66813) 3501826 -dictGet test_01037.dict_array (31.1996,-0.353983) 3501898 -dictGet test_01037.dict_array (31.2019,2.86802) 3501793 -dictGet test_01037.dict_array (31.2087,2.31245) 3501825 -dictGet test_01037.dict_array (31.2125,3.2713200000000002) 3501793 -dictGet test_01037.dict_array (31.2137,-0.108129) 3501898 -dictGet test_01037.dict_array (31.216,3.9156) 101 -dictGet test_01037.dict_array (31.2201,-0.202141) 3501898 -dictGet test_01037.dict_array (31.2285,2.09058) 101 -dictGet test_01037.dict_array (31.2502,4.01526) 101 -dictGet test_01037.dict_array (31.2585,3.11524) 3501793 -dictGet test_01037.dict_array (31.2645,-0.620418) 3501890 -dictGet test_01037.dict_array (31.2684,2.74277) 3501793 -dictGet test_01037.dict_array (31.2821,-1.12772) 101 -dictGet test_01037.dict_array (31.2821,2.46769) 3501825 -dictGet test_01037.dict_array (31.2887,3.91396) 101 -dictGet test_01037.dict_array (31.295,1.49942) 3501926 -dictGet test_01037.dict_array (31.2997,3.46122) 3501826 -dictGet test_01037.dict_array (31.3017,3.3263) 3501826 -dictGet test_01037.dict_array (31.3022,3.16754) 3501793 -dictGet test_01037.dict_array (31.3048,0.364962) 3498014 -dictGet test_01037.dict_array (31.305,3.1967) 3501793 -dictGet test_01037.dict_array (31.3061,1.84303) 101 -dictGet test_01037.dict_array (31.3082,-0.173851) 3501898 -dictGet test_01037.dict_array (31.3315,3.90932) 101 -dictGet test_01037.dict_array (31.3351,2.80164) 3501793 -dictGet test_01037.dict_array (31.3388,0.168765) 3498015 -dictGet test_01037.dict_array (31.339,0.25535) 3498094 -dictGet test_01037.dict_array (31.3423,1.7036799999999999) 3501926 -dictGet test_01037.dict_array (31.349,0.386456) 3498014 -dictGet test_01037.dict_array (31.3558,-1.04336) 101 -dictGet test_01037.dict_array (31.3564,0.478876) 3498014 -dictGet test_01037.dict_array (31.3607,-0.0860507) 3498015 -dictGet test_01037.dict_array (31.3831,3.84469) 101 -dictGet test_01037.dict_array (31.3886,-0.731137) 3501890 -dictGet test_01037.dict_array (31.4043,-0.348907) 5457271 -dictGet test_01037.dict_array (31.4081,1.47391) 3501926 -dictGet test_01037.dict_array (31.4176,-0.583645) 5457271 -dictGet test_01037.dict_array (31.4177,1.36972) 3501926 -dictGet test_01037.dict_array (31.4182,0.958303) 3498022 -dictGet test_01037.dict_array (31.4199,3.1738) 3501793 -dictGet test_01037.dict_array (31.4221,2.74876) 3501825 -dictGet test_01037.dict_array (31.4301,-0.122643) 3498015 -dictGet test_01037.dict_array (31.4344,1.00661) 3498022 -dictGet test_01037.dict_array (31.4375,4.20304) 101 -dictGet test_01037.dict_array (31.4377,0.289608) 3498094 -dictGet test_01037.dict_array (31.4379,0.54744) 3498014 -dictGet test_01037.dict_array (31.4459,3.94945) 101 -dictGet test_01037.dict_array (31.4559,-0.345063) 5457271 -dictGet test_01037.dict_array (31.464,0.726129) 3498014 -dictGet test_01037.dict_array (31.4662,-0.299019) 3498015 -dictGet test_01037.dict_array (31.4671,1.9605299999999999) 3501794 -dictGet test_01037.dict_array (31.4673,-0.403676) 5457271 -dictGet test_01037.dict_array (31.4712,-0.237941) 3498015 -dictGet test_01037.dict_array (31.4816,0.120264) 3498015 -dictGet test_01037.dict_array (31.4875,0.323483) 3498014 -dictGet test_01037.dict_array (31.490099999999998,-0.338163) 5457271 -dictGet test_01037.dict_array (31.4932,0.517674) 3498014 -dictGet test_01037.dict_array (31.5112,1.9689299999999998) 3501794 -dictGet test_01037.dict_array (31.5122,2.92785) 3501791 -dictGet test_01037.dict_array (31.5151,0.166429) 3498094 -dictGet test_01037.dict_array (31.5174,2.94802) 3501791 -dictGet test_01037.dict_array (31.5182,4.18776) 101 -dictGet test_01037.dict_array (31.5238,1.18793) 3498003 -dictGet test_01037.dict_array (31.5271,3.07446) 3501791 -dictGet test_01037.dict_array (31.5393,1.58061) 3501794 -dictGet test_01037.dict_array (31.5421,3.13711) 3501791 -dictGet test_01037.dict_array (31.5479,2.39897) 3497970 -dictGet test_01037.dict_array (31.5519,0.99285) 3498003 -dictGet test_01037.dict_array (31.5685,3.47987) 3501824 -dictGet test_01037.dict_array (31.5959,0.437382) 3498014 -dictGet test_01037.dict_array (31.6003,0.194376) 3498094 -dictGet test_01037.dict_array (31.6026,2.15457) 3501794 -dictGet test_01037.dict_array (31.606,2.45365) 3497970 -dictGet test_01037.dict_array (31.6062,-0.453441) 3501890 -dictGet test_01037.dict_array (31.6107,1.35247) 3497974 -dictGet test_01037.dict_array (31.6155,3.85588) 101 -dictGet test_01037.dict_array (31.6222,2.03326) 3501794 -dictGet test_01037.dict_array (31.6231,-0.123059) 3498083 -dictGet test_01037.dict_array (31.6244,1.6885599999999998) 3497974 -dictGet test_01037.dict_array (31.6459,0.669716) 3498014 -dictGet test_01037.dict_array (31.6563,-0.0644741) 3498083 -dictGet test_01037.dict_array (31.6618,-0.551121) 3501890 -dictGet test_01037.dict_array (31.6725,-0.38922) 3498085 -dictGet test_01037.dict_array (31.6727,4.10336) 101 -dictGet test_01037.dict_array (31.6739,4.1391) 101 -dictGet test_01037.dict_array (31.6897,2.8694699999999997) 3501792 -dictGet test_01037.dict_array (31.6902,3.98792) 101 -dictGet test_01037.dict_array (31.6945,2.46687) 3497970 -dictGet test_01037.dict_array (31.6987,-1.3796) 101 -dictGet test_01037.dict_array (31.7012,2.34845) 3497970 -dictGet test_01037.dict_array (31.7036,0.0228348) 3501888 -dictGet test_01037.dict_array (31.7046,3.68111) 3501824 -dictGet test_01037.dict_array (31.7055,2.92556) 3501792 -dictGet test_01037.dict_array (31.7102,1.04532) 3498003 -dictGet test_01037.dict_array (31.7149,-0.443302) 3498085 -dictGet test_01037.dict_array (31.7195,2.99311) 3501791 -dictGet test_01037.dict_array (31.7274,0.166719) 3498094 -dictGet test_01037.dict_array (31.7565,-0.565382) 3498085 -dictGet test_01037.dict_array (31.7615,0.771626) 3498014 -dictGet test_01037.dict_array (31.7739,1.8970099999999999) 3497974 -dictGet test_01037.dict_array (31.7848,1.2623199999999999) 3498003 -dictGet test_01037.dict_array (31.7912,-0.788599) 101 -dictGet test_01037.dict_array (31.8011,2.65853) 3497970 -dictGet test_01037.dict_array (31.8032,-0.0590108) 3501888 -dictGet test_01037.dict_array (31.8038,1.9618799999999998) 3497974 -dictGet test_01037.dict_array (31.8098,-1.46851) 101 -dictGet test_01037.dict_array (31.8131,3.41982) 3501791 -dictGet test_01037.dict_array (31.8169,3.31059) 3501791 -dictGet test_01037.dict_array (31.8202,-0.193692) 3501888 -dictGet test_01037.dict_array (31.8306,1.57586) 3497974 -dictGet test_01037.dict_array (31.8382,-0.787948) 101 -dictGet test_01037.dict_array (31.8433,2.49692) 3497970 -dictGet test_01037.dict_array (31.8436,2.41851) 3497970 -dictGet test_01037.dict_array (31.8563,-1.10787) 101 -dictGet test_01037.dict_array (31.8683,0.996504) 3498002 -dictGet test_01037.dict_array (31.8693,-0.828142) 101 -dictGet test_01037.dict_array (31.8723,1.08929) 3498003 -dictGet test_01037.dict_array (31.8737,0.881127) 3498002 -dictGet test_01037.dict_array (31.8881,-0.58441) 101 -dictGet test_01037.dict_array (31.9011,0.121349) 3498094 -dictGet test_01037.dict_array (31.9066,2.13045) 3497965 -dictGet test_01037.dict_array (31.9142,1.03368) 3498002 -dictGet test_01037.dict_array (31.9155,3.38363) 3501791 -dictGet test_01037.dict_array (31.9168,1.3166) 3498004 -dictGet test_01037.dict_array (31.9185,-1.11879) 101 -dictGet test_01037.dict_array (31.9186,-0.647948) 101 -dictGet test_01037.dict_array (31.9311,3.96928) 101 -dictGet test_01037.dict_array (31.9335,1.47048) 3497974 -dictGet test_01037.dict_array (31.9443,-1.36175) 101 -dictGet test_01037.dict_array (31.9481,2.34231) 3497970 -dictGet test_01037.dict_array (31.9526,1.36565) 3498004 -dictGet test_01037.dict_array (31.9629,2.5208399999999997) 3497970 -dictGet test_01037.dict_array (31.9765,0.975783) 3498002 -dictGet test_01037.dict_array (31.9923,3.31773) 3501791 -dictGet test_01037.dict_array (31.9994,0.972816) 3498002 -dictGet test_01037.dict_array (32.001,3.47425) 3501791 -dictGet test_01037.dict_array (32.0127,2.13874) 3497965 -dictGet test_01037.dict_array (32.0244,3.2092) 3501792 -dictGet test_01037.dict_array (32.029,1.18039) 3498004 -dictGet test_01037.dict_array (32.0315,0.566073) 3498095 -dictGet test_01037.dict_array (32.0354,1.0766499999999999) 3498004 -dictGet test_01037.dict_array (32.0399,-1.11576) 101 -dictGet test_01037.dict_array (32.053,2.16849) 3497965 -dictGet test_01037.dict_array (32.0542,0.042328) 3498096 -dictGet test_01037.dict_array (32.0576,2.47001) 3497970 -dictGet test_01037.dict_array (32.061,3.7498899999999997) 101 -dictGet test_01037.dict_array (32.0623,1.25134) 3498004 -dictGet test_01037.dict_array (32.0626,1.9611399999999999) 3497965 -dictGet test_01037.dict_array (32.0666,-0.0904247) 3498096 -dictGet test_01037.dict_array (32.0681,2.28442) 3497970 -dictGet test_01037.dict_array (32.0692,1.50869) 3497981 -dictGet test_01037.dict_array (32.0724,4.03314) 101 -dictGet test_01037.dict_array (32.0729,-0.064324) 101 -dictGet test_01037.dict_array (32.079,0.293758) 3498094 -dictGet test_01037.dict_array (32.0847,-1.19814) 101 -dictGet test_01037.dict_array (32.0974,-0.91927) 101 -dictGet test_01037.dict_array (32.0979,-0.736979) 101 -dictGet test_01037.dict_array (32.106,-1.33063) 101 -dictGet test_01037.dict_array (32.1189,0.246715) 3498094 -dictGet test_01037.dict_array (32.1207,4.00883) 101 -dictGet test_01037.dict_array (32.1396,1.12402) 3498004 -dictGet test_01037.dict_array (32.1413,1.5668) 3497981 -dictGet test_01037.dict_array (32.143,1.35559) 3498004 -dictGet test_01037.dict_array (32.1538,1.32881) 3498004 -dictGet test_01037.dict_array (32.1549,4.06552) 101 -dictGet test_01037.dict_array (32.1555,-0.79275) 101 -dictGet test_01037.dict_array (32.163,1.17733) 3498004 -dictGet test_01037.dict_array (32.1634,2.94273) 3501792 -dictGet test_01037.dict_array (32.1644,1.85666) 3497965 -dictGet test_01037.dict_array (32.1745,0.435458) 3498095 -dictGet test_01037.dict_array (32.1765,1.65149) 3497981 -dictGet test_01037.dict_array (32.1893,2.08924) 3497965 -dictGet test_01037.dict_array (32.2024,0.222191) 3498093 -dictGet test_01037.dict_array (32.2107,1.34379) 3497981 -dictGet test_01037.dict_array (32.2109,3.9018699999999997) 101 -dictGet test_01037.dict_array (32.2123,1.85233) 3497965 -dictGet test_01037.dict_array (32.2144,3.72534) 101 -dictGet test_01037.dict_array (32.2218,2.5386699999999998) 3497970 -dictGet test_01037.dict_array (32.2279,2.84267) 3497245 -dictGet test_01037.dict_array (32.2345,3.33295) 3501792 -dictGet test_01037.dict_array (32.2435,3.85283) 101 -dictGet test_01037.dict_array (32.2527,-0.480608) 101 -dictGet test_01037.dict_array (32.2566,-0.837882) 101 -dictGet test_01037.dict_array (32.2627,2.57708) 3497970 -dictGet test_01037.dict_array (32.2733,0.244931) 3498096 -dictGet test_01037.dict_array (32.2761,4.05808) 101 -dictGet test_01037.dict_array (32.2764,3.78472) 101 -dictGet test_01037.dict_array (32.2814,-1.26011) 101 -dictGet test_01037.dict_array (32.2861,3.02427) 3497245 -dictGet test_01037.dict_array (32.2924,0.928609) 3498004 -dictGet test_01037.dict_array (32.2963,-0.78543) 101 -dictGet test_01037.dict_array (32.3039,3.21175) 3501792 -dictGet test_01037.dict_array (32.3107,0.698287) 3498004 -dictGet test_01037.dict_array (32.3138,0.0595677) 3498106 -dictGet test_01037.dict_array (32.3339,0.707056) 3498004 -dictGet test_01037.dict_array (32.3351,0.415474) 3498106 -dictGet test_01037.dict_array (32.342,-0.681023) 101 -dictGet test_01037.dict_array (32.3463,1.83196) 3497126 -dictGet test_01037.dict_array (32.3494,2.43799) 3497114 -dictGet test_01037.dict_array (32.3524,3.47049) 3501822 -dictGet test_01037.dict_array (32.3531,2.33115) 3497114 -dictGet test_01037.dict_array (32.3602,0.116106) 3498106 -dictGet test_01037.dict_array (32.3612,1.1598) 3498004 -dictGet test_01037.dict_array (32.3689,3.34847) 3501822 -dictGet test_01037.dict_array (32.3695,0.734055) 3498004 -dictGet test_01037.dict_array (32.3825,3.85017) 101 -dictGet test_01037.dict_array (32.3835,-1.25491) 101 -dictGet test_01037.dict_array (32.4018,-0.728568) 101 -dictGet test_01037.dict_array (32.4044,2.96727) 3497245 -dictGet test_01037.dict_array (32.4101,2.9988) 3497245 -dictGet test_01037.dict_array (32.417,-1.12908) 101 -dictGet test_01037.dict_array (32.4172,4.1952) 101 -dictGet test_01037.dict_array (32.4239,2.49512) 3497245 -dictGet test_01037.dict_array (32.4258,4.05137) 101 -dictGet test_01037.dict_array (32.4264,-0.427357) 101 -dictGet test_01037.dict_array (32.4274,3.59377) 3501822 -dictGet test_01037.dict_array (32.4286,-1.24757) 101 -dictGet test_01037.dict_array (32.4294,3.0665) 3497245 -dictGet test_01037.dict_array (32.4333,-0.353347) 101 -dictGet test_01037.dict_array (32.4391,3.64421) 3501822 -dictGet test_01037.dict_array (32.4401,3.70635) 3501822 -dictGet test_01037.dict_array (32.45,1.68918) 3497126 -dictGet test_01037.dict_array (32.4507,-0.133471) 101 -dictGet test_01037.dict_array (32.4592,0.976458) 3498105 -dictGet test_01037.dict_array (32.4595,1.89135) 3497126 -dictGet test_01037.dict_array (32.4604,0.280248) 3498106 -dictGet test_01037.dict_array (32.4835,0.472731) 3498106 -dictGet test_01037.dict_array (32.4855,2.01938) 3497126 -dictGet test_01037.dict_array (32.4872,2.01697) 3497126 -dictGet test_01037.dict_array (32.4911,0.613106) 3498105 -dictGet test_01037.dict_array (32.4918,2.17834) 3497114 -dictGet test_01037.dict_array (32.4947,2.34595) 3497114 -dictGet test_01037.dict_array (32.5035,2.92234) 3497245 -dictGet test_01037.dict_array (32.5132,-0.331206) 101 -dictGet test_01037.dict_array (32.5156,-0.412604) 3501887 -dictGet test_01037.dict_array (32.5158,2.9067499999999997) 3497245 -dictGet test_01037.dict_array (32.5249,2.44519) 3497114 -dictGet test_01037.dict_array (32.5293,-0.790952) 101 -dictGet test_01037.dict_array (32.5319,3.96854) 101 -dictGet test_01037.dict_array (32.5518,3.6093) 3501822 -dictGet test_01037.dict_array (32.5541,3.5225400000000002) 3501822 -dictGet test_01037.dict_array (32.5569,0.816123) 3498105 -dictGet test_01037.dict_array (32.5646,1.9775) 3497126 -dictGet test_01037.dict_array (32.5733,3.81271) 101 -dictGet test_01037.dict_array (32.5767,0.948327) 3498105 -dictGet test_01037.dict_array (32.5971,1.76179) 3497126 -dictGet test_01037.dict_array (32.6035,-0.716157) 101 -dictGet test_01037.dict_array (32.6087,4.21614) 101 -dictGet test_01037.dict_array (32.6171,0.024481) 101 -dictGet test_01037.dict_array (32.6189,-0.775391) 101 -dictGet test_01037.dict_array (32.6198,2.92081) 3497167 -dictGet test_01037.dict_array (32.621,-0.970784) 101 -dictGet test_01037.dict_array (32.6266,0.650009) 3498105 -dictGet test_01037.dict_array (32.6315,2.15144) 3497126 -dictGet test_01037.dict_array (32.6385,-0.436803) 101 -dictGet test_01037.dict_array (32.6449,-0.191292) 101 -dictGet test_01037.dict_array (32.6535,2.10385) 3497126 -dictGet test_01037.dict_array (32.6592,3.49973) 3501822 -dictGet test_01037.dict_array (32.6598,2.5980600000000003) 3497114 -dictGet test_01037.dict_array (32.6612,2.95681) 3497167 -dictGet test_01037.dict_array (32.6636,-0.57235) 101 -dictGet test_01037.dict_array (32.669,-0.382702) 101 -dictGet test_01037.dict_array (32.6752,1.30748) 3497981 -dictGet test_01037.dict_array (32.6811,2.9559800000000003) 3497167 -dictGet test_01037.dict_array (32.6821,0.57336) 3498105 -dictGet test_01037.dict_array (32.6828,3.91304) 101 -dictGet test_01037.dict_array (32.6979,3.96868) 101 -dictGet test_01037.dict_array (32.6983,3.15784) 3497167 -dictGet test_01037.dict_array (32.7122,0.794293) 3498105 -dictGet test_01037.dict_array (32.7131,-0.847256) 101 -dictGet test_01037.dict_array (32.7219,0.883461) 3498105 -dictGet test_01037.dict_array (32.7228,1.78808) 3497126 -dictGet test_01037.dict_array (32.7273,-0.206908) 101 -dictGet test_01037.dict_array (32.7292,0.259331) 3501889 -dictGet test_01037.dict_array (32.7304,-1.38317) 101 -dictGet test_01037.dict_array (32.7353,1.01601) 3498105 -dictGet test_01037.dict_array (32.7354,4.17574) 101 -dictGet test_01037.dict_array (32.7357,-0.190194) 101 -dictGet test_01037.dict_array (32.7465,-1.37598) 101 -dictGet test_01037.dict_array (32.7494,-0.275675) 101 -dictGet test_01037.dict_array (32.7514,0.128951) 3501889 -dictGet test_01037.dict_array (32.753,3.44207) 3501822 -dictGet test_01037.dict_array (32.7686,2.11713) 3497126 -dictGet test_01037.dict_array (32.7694,1.47159) 3497388 -dictGet test_01037.dict_array (32.7768,0.0401042) 101 -dictGet test_01037.dict_array (32.781,-1.34283) 101 -dictGet test_01037.dict_array (32.7814,1.73876) 3497388 -dictGet test_01037.dict_array (32.7856,-1.06363) 101 -dictGet test_01037.dict_array (32.792699999999996,-1.1255600000000001) 101 -dictGet test_01037.dict_array (32.7941,-0.645447) 101 -dictGet test_01037.dict_array (32.7946,1.48889) 3497388 -dictGet test_01037.dict_array (32.797,0.791753) 3501889 -dictGet test_01037.dict_array (32.7982,-0.537798) 101 -dictGet test_01037.dict_array (32.8091,2.3611) 3490438 -dictGet test_01037.dict_array (32.81,1.7130800000000002) 3497388 -dictGet test_01037.dict_array (32.8174,-0.288322) 101 -dictGet test_01037.dict_array (32.823,1.6546699999999999) 3497388 -dictGet test_01037.dict_array (32.8233,1.62108) 3497388 -dictGet test_01037.dict_array (32.8428,-0.400045) 101 -dictGet test_01037.dict_array (32.8479,2.13598) 3490438 -dictGet test_01037.dict_array (32.8524,0.199902) 3501889 -dictGet test_01037.dict_array (32.8543,3.23553) 3501820 -dictGet test_01037.dict_array (32.8562,1.31371) 3498117 -dictGet test_01037.dict_array (32.87,1.44256) 3498117 -dictGet test_01037.dict_array (32.8789,2.38192) 3490438 -dictGet test_01037.dict_array (32.8812,2.20734) 3497128 -dictGet test_01037.dict_array (32.8815,-0.54427) 101 -dictGet test_01037.dict_array (32.8853,2.4859) 3497128 -dictGet test_01037.dict_array (32.8909,0.513964) 3501889 -dictGet test_01037.dict_array (32.9035,2.38999) 3490438 -dictGet test_01037.dict_array (32.9097,2.48131) 3497128 -dictGet test_01037.dict_array (32.928,-0.943269) 101 -dictGet test_01037.dict_array (32.9322,1.13165) 3498104 -dictGet test_01037.dict_array (32.9348,1.22606) 3498117 -dictGet test_01037.dict_array (32.9417,3.77998) 3501822 -dictGet test_01037.dict_array (32.9428,3.11936) 3497167 -dictGet test_01037.dict_array (32.9482,1.18092) 3498118 -dictGet test_01037.dict_array (32.9506,0.0609364) 101 -dictGet test_01037.dict_array (32.953,-0.828308) 101 -dictGet test_01037.dict_array (32.9593,3.5209099999999998) 3501822 -dictGet test_01037.dict_array (32.9617,2.07711) 3497128 -dictGet test_01037.dict_array (32.966,0.693749) 3498104 -dictGet test_01037.dict_array (32.9668,-0.716432) 101 -dictGet test_01037.dict_array (32.9702,1.98555) 3497127 -dictGet test_01037.dict_array (32.9782,1.73819) 3497388 -dictGet test_01037.dict_array (32.9805,3.71151) 3501822 -dictGet test_01037.dict_array (32.9821,2.97225) 3497167 -dictGet test_01037.dict_array (32.995,-0.830301) 101 -dictGet test_01037.dict_array (33.0234,0.770848) 3498104 -dictGet test_01037.dict_array (33.0312,-0.340964) 101 -dictGet test_01037.dict_array (33.0366,-0.756795) 101 -dictGet test_01037.dict_array (33.0438,0.812871) 3498118 -dictGet test_01037.dict_array (33.0455,1.84843) 3497127 -dictGet test_01037.dict_array (33.0498,0.0913292) 101 -dictGet test_01037.dict_array (33.0506,1.53739) 3497364 -dictGet test_01037.dict_array (33.0554,2.4265) 3497363 -dictGet test_01037.dict_array (33.0741,3.61332) 3501822 -dictGet test_01037.dict_array (33.0765,-0.179985) 101 -dictGet test_01037.dict_array (33.087,1.46465) 3497399 -dictGet test_01037.dict_array (33.0906,-0.620383) 101 -dictGet test_01037.dict_array (33.1047,-1.28027) 101 -dictGet test_01037.dict_array (33.1072,1.96303) 3497127 -dictGet test_01037.dict_array (33.1081,-0.897874) 101 -dictGet test_01037.dict_array (33.1122,1.8950200000000001) 3497127 -dictGet test_01037.dict_array (33.1237,2.63993) 3497165 -dictGet test_01037.dict_array (33.1238,0.753963) 3498118 -dictGet test_01037.dict_array (33.1257,0.495668) 3498102 -dictGet test_01037.dict_array (33.1258,1.78341) 3497364 -dictGet test_01037.dict_array (33.127,2.59646) 3497166 -dictGet test_01037.dict_array (33.1324,-1.23742) 101 -dictGet test_01037.dict_array (33.1359,3.83491) 101 -dictGet test_01037.dict_array (33.1628,-0.379588) 101 -dictGet test_01037.dict_array (33.1679,1.25601) 3498117 -dictGet test_01037.dict_array (33.1688,-1.35553) 101 -dictGet test_01037.dict_array (33.181,2.10943) 3497363 -dictGet test_01037.dict_array (33.1871,2.81171) 3497165 -dictGet test_01037.dict_array (33.1877,0.771297) 3498118 -dictGet test_01037.dict_array (33.1883,-0.204797) 101 -dictGet test_01037.dict_array (33.1886,3.27998) 3501820 -dictGet test_01037.dict_array (33.1955,0.708907) 3498118 -dictGet test_01037.dict_array (33.2044,-0.769275) 101 -dictGet test_01037.dict_array (33.2182,3.36103) 3501820 -dictGet test_01037.dict_array (33.2192,3.43586) 3501822 -dictGet test_01037.dict_array (33.2322,-0.916753) 101 -dictGet test_01037.dict_array (33.2359,-0.81321) 101 -dictGet test_01037.dict_array (33.238,0.635072) 3498111 -dictGet test_01037.dict_array (33.2398,3.02588) 3497165 -dictGet test_01037.dict_array (33.2469,2.35698) 3497363 -dictGet test_01037.dict_array (33.247,2.3327) 3497363 -dictGet test_01037.dict_array (33.2579,2.8027100000000003) 3497165 -dictGet test_01037.dict_array (33.2607,0.321082) 101 -dictGet test_01037.dict_array (33.2653,0.243336) 101 -dictGet test_01037.dict_array (33.2758,0.831836) 3498118 -dictGet test_01037.dict_array (33.2771,0.886536) 3498118 -dictGet test_01037.dict_array (33.2914,1.16026) 3498117 -dictGet test_01037.dict_array (33.2914,1.38882) 3497399 -dictGet test_01037.dict_array (33.2982,-1.16604) 101 -dictGet test_01037.dict_array (33.2985,0.842556) 3498112 -dictGet test_01037.dict_array (33.3005,2.8338900000000002) 3497165 -dictGet test_01037.dict_array (33.305,0.0969475) 101 -dictGet test_01037.dict_array (33.3072,3.82163) 101 -dictGet test_01037.dict_array (33.312,3.41475) 3501820 -dictGet test_01037.dict_array (33.3129,2.46048) 3497166 -dictGet test_01037.dict_array (33.3134,3.46863) 3501820 -dictGet test_01037.dict_array (33.3203,2.33139) 3497166 -dictGet test_01037.dict_array (33.324,0.433701) 101 -dictGet test_01037.dict_array (33.3338,2.44705) 3497166 -dictGet test_01037.dict_array (33.337,4.06475) 101 -dictGet test_01037.dict_array (33.3469,1.08172) 3498126 -dictGet test_01037.dict_array (33.3538,0.717896) 3498112 -dictGet test_01037.dict_array (33.3618,1.37899) 3497399 -dictGet test_01037.dict_array (33.3698,0.547744) 3501862 -dictGet test_01037.dict_array (33.3705,0.957619) 3498112 -dictGet test_01037.dict_array (33.3821,3.07258) 3497165 -dictGet test_01037.dict_array (33.3881,3.0626) 3497165 -dictGet test_01037.dict_array (33.393,-0.816186) 101 -dictGet test_01037.dict_array (33.3945,0.869508) 3498110 -dictGet test_01037.dict_array (33.4001,1.24186) 3498117 -dictGet test_01037.dict_array (33.4008,2.34911) 3497166 -dictGet test_01037.dict_array (33.4166,-1.2808899999999999) 101 -dictGet test_01037.dict_array (33.4167,3.0655) 3497165 -dictGet test_01037.dict_array (33.4204,2.81887) 3497165 -dictGet test_01037.dict_array (33.4211,1.71128) 3497400 -dictGet test_01037.dict_array (33.4237,2.91761) 3497165 -dictGet test_01037.dict_array (33.4266,1.5955599999999999) 3497399 -dictGet test_01037.dict_array (33.4353,-0.391392) 101 -dictGet test_01037.dict_array (33.4362,-0.134658) 101 -dictGet test_01037.dict_array (33.4386,0.15396) 101 -dictGet test_01037.dict_array (33.4421,-0.50712) 101 -dictGet test_01037.dict_array (33.452,0.915829) 3498126 -dictGet test_01037.dict_array (33.463,-0.0882717) 101 -dictGet test_01037.dict_array (33.464,-1.00949) 101 -dictGet test_01037.dict_array (33.4692,0.954092) 3498126 -dictGet test_01037.dict_array (33.4716,1.9538799999999998) 3497400 -dictGet test_01037.dict_array (33.4756,1.85836) 3497400 -dictGet test_01037.dict_array (33.4859,4.0751) 101 -dictGet test_01037.dict_array (33.4899,3.54193) 3501820 -dictGet test_01037.dict_array (33.4935,3.49794) 3501820 -dictGet test_01037.dict_array (33.494,-0.983356) 101 -dictGet test_01037.dict_array (33.4955,-1.28128) 101 -dictGet test_01037.dict_array (33.4965,-0.278687) 101 -dictGet test_01037.dict_array (33.4991,0.647491) 3498110 -dictGet test_01037.dict_array (33.5076,2.2272) 3497424 -dictGet test_01037.dict_array (33.5079,-0.498199) 101 -dictGet test_01037.dict_array (33.5157,0.535034) 3501862 -dictGet test_01037.dict_array (33.5171,2.49677) 3497166 -dictGet test_01037.dict_array (33.5255,2.4447200000000002) 3497166 -dictGet test_01037.dict_array (33.526,4.01194) 101 -dictGet test_01037.dict_array (33.5288,0.789434) 3498110 -dictGet test_01037.dict_array (33.5356,-1.17671) 101 -dictGet test_01037.dict_array (33.5402,1.49152) 3497399 -dictGet test_01037.dict_array (33.5418,3.45757) 3501820 -dictGet test_01037.dict_array (33.5428,1.90712) 3497400 -dictGet test_01037.dict_array (33.5556,-0.55741) 101 -dictGet test_01037.dict_array (33.5564,0.876858) 3498128 -dictGet test_01037.dict_array (33.5567,-0.10208) 101 -dictGet test_01037.dict_array (33.5645,-0.124824) 101 -dictGet test_01037.dict_array (33.5663,3.4872) 3501820 -dictGet test_01037.dict_array (33.5716,-0.0107611) 101 -dictGet test_01037.dict_array (33.578,3.55714) 3501820 -dictGet test_01037.dict_array (33.5826,-0.49076) 101 -dictGet test_01037.dict_array (33.5909,0.773737) 3498110 -dictGet test_01037.dict_array (33.5958,2.9619999999999997) 3497425 -dictGet test_01037.dict_array (33.6193,-0.919755) 101 -dictGet test_01037.dict_array (33.6313,0.652132) 3498110 -dictGet test_01037.dict_array (33.632,0.823351) 3498128 -dictGet test_01037.dict_array (33.66,2.18998) 3497424 -dictGet test_01037.dict_array (33.6621,0.535395) 3498135 -dictGet test_01037.dict_array (33.6726,3.19367) 3497438 -dictGet test_01037.dict_array (33.6912,1.74522) 3497400 -dictGet test_01037.dict_array (33.705,0.706397) 3498135 -dictGet test_01037.dict_array (33.7076,0.7622) 3498128 -dictGet test_01037.dict_array (33.7112,1.70187) 3497400 -dictGet test_01037.dict_array (33.7246,-1.14837) 101 -dictGet test_01037.dict_array (33.7326,2.62413) 3497425 -dictGet test_01037.dict_array (33.7332,2.82137) 3497425 -dictGet test_01037.dict_array (33.7434,0.394672) 3498135 -dictGet test_01037.dict_array (33.7443,1.54557) 3497398 -dictGet test_01037.dict_array (33.7506,1.57317) 3497398 -dictGet test_01037.dict_array (33.7526,1.8578999999999999) 3497424 -dictGet test_01037.dict_array (33.766,4.15013) 101 -dictGet test_01037.dict_array (33.7834,2.41789) 3497439 -dictGet test_01037.dict_array (33.7864,0.230935) 101 -dictGet test_01037.dict_array (33.7965,3.05709) 3497438 -dictGet test_01037.dict_array (33.7998,3.32881) 3497438 -dictGet test_01037.dict_array (33.8003,2.97338) 3497425 -dictGet test_01037.dict_array (33.8007,-1.08962) 101 -dictGet test_01037.dict_array (33.8022,-0.139488) 101 -dictGet test_01037.dict_array (33.8065,2.70857) 3497425 -dictGet test_01037.dict_array (33.8169,-0.607788) 101 -dictGet test_01037.dict_array (33.8203,0.108512) 3501863 -dictGet test_01037.dict_array (33.8231,-1.03449) 101 -dictGet test_01037.dict_array (33.8312,3.49458) 3501829 -dictGet test_01037.dict_array (33.8342,0.297518) 3501863 -dictGet test_01037.dict_array (33.8352,0.165872) 101 -dictGet test_01037.dict_array (33.8354,1.87277) 3497424 -dictGet test_01037.dict_array (33.8371,1.60103) 3497398 -dictGet test_01037.dict_array (33.8387,1.9968) 3497424 -dictGet test_01037.dict_array (33.8403,3.5805) 3501829 -dictGet test_01037.dict_array (33.8414,-0.703067) 101 -dictGet test_01037.dict_array (33.844,-0.179472) 101 -dictGet test_01037.dict_array (33.8468,3.40137) 3501829 -dictGet test_01037.dict_array (33.8509,4.15334) 101 -dictGet test_01037.dict_array (33.8539,2.38339) 3497439 -dictGet test_01037.dict_array (33.858,-1.3122500000000001) 101 -dictGet test_01037.dict_array (33.859,3.72626) 3501829 -dictGet test_01037.dict_array (33.8616,2.24433) 3497424 -dictGet test_01037.dict_array (33.8621,3.01035) 3497438 -dictGet test_01037.dict_array (33.8623,1.17559) 3498129 -dictGet test_01037.dict_array (33.8682,2.706) 3497425 -dictGet test_01037.dict_array (33.8684,0.189231) 3501863 -dictGet test_01037.dict_array (33.872,1.93574) 3497424 -dictGet test_01037.dict_array (33.8844,3.80404) 3501829 -dictGet test_01037.dict_array (33.8888,0.594884) 3498135 -dictGet test_01037.dict_array (33.8946,2.74161) 3497438 -dictGet test_01037.dict_array (33.9023,0.6239) 3498135 -dictGet test_01037.dict_array (33.9057,0.873222) 3498136 -dictGet test_01037.dict_array (33.9157,-1.26607) 101 -dictGet test_01037.dict_array (33.92,2.06848) 3497397 -dictGet test_01037.dict_array (33.9298,-0.00526229) 101 -dictGet test_01037.dict_array (33.932,3.07063) 3497438 -dictGet test_01037.dict_array (33.9322,0.629385) 3501864 -dictGet test_01037.dict_array (33.9367,-1.41955) 101 -dictGet test_01037.dict_array (33.937,1.42532) 3498173 -dictGet test_01037.dict_array (33.9375,1.1467100000000001) 3498159 -dictGet test_01037.dict_array (33.9434,-1.05739) 101 -dictGet test_01037.dict_array (33.9477,3.34809) 3501829 -dictGet test_01037.dict_array (33.95,2.21715) 3497397 -dictGet test_01037.dict_array (33.955799999999996,0.305176) 3501859 -dictGet test_01037.dict_array (33.9686,-0.28273) 101 -dictGet test_01037.dict_array (33.9703,4.1255) 3501829 -dictGet test_01037.dict_array (33.9707,3.08199) 3497438 -dictGet test_01037.dict_array (33.9754,1.06203) 3498159 -dictGet test_01037.dict_array (33.9757,3.72468) 3501829 -dictGet test_01037.dict_array (33.9775,-0.0440599) 101 -dictGet test_01037.dict_array (33.9777,-0.251484) 101 -dictGet test_01037.dict_array (33.9789,-0.339374) 101 -dictGet test_01037.dict_array (33.9849,2.54515) 3497425 -dictGet test_01037.dict_array (33.9885,-0.318557) 101 -dictGet test_01037.dict_array (33.9977,1.07175) 3498159 -dictGet test_01037.dict_array (33.9984,-0.700517) 101 -dictGet test_01037.dict_array (34.0149,3.53338) 3501829 -dictGet test_01037.dict_array (34.0173,3.39155) 3501829 -dictGet test_01037.dict_array (34.0317,3.9579) 3501829 -dictGet test_01037.dict_array (34.0369,3.83612) 3501829 -dictGet test_01037.dict_array (34.043,-0.0887221) 101 -dictGet test_01037.dict_array (34.0487,1.14252) 3498159 -dictGet test_01037.dict_array (34.052,1.74832) 3497397 -dictGet test_01037.dict_array (34.0711,-0.898071) 101 -dictGet test_01037.dict_array (34.0747,1.55057) 3498173 -dictGet test_01037.dict_array (34.0803,3.16763) 3497438 -dictGet test_01037.dict_array (34.0872,3.75555) 3501829 -dictGet test_01037.dict_array (34.0965,1.62038) 3498173 -dictGet test_01037.dict_array (34.0977,-0.412691) 101 -dictGet test_01037.dict_array (34.0986,0.0294206) 101 -dictGet test_01037.dict_array (34.1072,3.15823) 3497438 -dictGet test_01037.dict_array (34.1092,3.09599) 3497438 -dictGet test_01037.dict_array (34.1206,1.04637) 3498160 -dictGet test_01037.dict_array (34.1209,3.13826) 3497438 -dictGet test_01037.dict_array (34.1265,3.95881) 3501829 -dictGet test_01037.dict_array (34.1286,-0.539319) 101 -dictGet test_01037.dict_array (34.1358,3.67451) 3501829 -dictGet test_01037.dict_array (34.1428,0.136115) 101 -dictGet test_01037.dict_array (34.157,1.73522) 3497397 -dictGet test_01037.dict_array (34.1581,1.48001) 3498172 -dictGet test_01037.dict_array (34.1682,3.42373) 3501829 -dictGet test_01037.dict_array (34.1683,-1.26511) 101 -dictGet test_01037.dict_array (34.1684,4.20007) 101 -dictGet test_01037.dict_array (34.1854,3.32089) 3501829 -dictGet test_01037.dict_array (34.2022,0.749536) 3501864 -dictGet test_01037.dict_array (34.2044,3.04865) 3497438 -dictGet test_01037.dict_array (34.22,-0.500055) 101 -dictGet test_01037.dict_array (34.2249,0.743775) 3501864 -dictGet test_01037.dict_array (34.2254,1.34702) 3498172 -dictGet test_01037.dict_array (34.2355,-0.898843) 101 -dictGet test_01037.dict_array (34.2394,2.0203699999999998) 3497439 -dictGet test_01037.dict_array (34.2466,1.83785) 3498251 -dictGet test_01037.dict_array (34.247,4.09563) 101 -dictGet test_01037.dict_array (34.2508,2.61312) 3497439 -dictGet test_01037.dict_array (34.2517,1.69642) 3498251 -dictGet test_01037.dict_array (34.2564,4.13033) 101 -dictGet test_01037.dict_array (34.2574,4.18928) 101 -dictGet test_01037.dict_array (34.2614,-0.478719) 101 -dictGet test_01037.dict_array (34.2625,2.38088) 3497439 -dictGet test_01037.dict_array (34.2666,3.1503) 3501829 -dictGet test_01037.dict_array (34.271,4.02223) 101 -dictGet test_01037.dict_array (34.2727,0.514755) 101 -dictGet test_01037.dict_array (34.278,1.98929) 3497439 -dictGet test_01037.dict_array (34.2798,-0.199208) 101 -dictGet test_01037.dict_array (34.2804,2.05184) 3497439 -dictGet test_01037.dict_array (34.2945,-1.11051) 101 -dictGet test_01037.dict_array (34.3168,-0.0829721) 101 -dictGet test_01037.dict_array (34.3345,3.4358) 3501829 -dictGet test_01037.dict_array (34.3377,1.13527) 3498162 -dictGet test_01037.dict_array (34.3383,1.27891) 3498161 -dictGet test_01037.dict_array (34.3391,1.47945) 3498161 -dictGet test_01037.dict_array (34.3441,0.627014) 101 -dictGet test_01037.dict_array (34.347,2.4853) 3497439 -dictGet test_01037.dict_array (34.3514,2.16247) 3497439 -dictGet test_01037.dict_array (34.3627,2.64533) 3497439 -dictGet test_01037.dict_array (34.3682,-0.227501) 101 -dictGet test_01037.dict_array (34.3756,4.21248) 101 -dictGet test_01037.dict_array (34.379,3.96604) 101 -dictGet test_01037.dict_array (34.3827,1.7518) 3498251 -dictGet test_01037.dict_array (34.3912,2.8834) 3501830 -dictGet test_01037.dict_array (34.3919,0.668829) 101 -dictGet test_01037.dict_array (34.3949,2.00338) 3497439 -dictGet test_01037.dict_array (34.3987,0.557268) 101 -dictGet test_01037.dict_array (34.4111,0.768558) 101 -dictGet test_01037.dict_array (34.4119,2.8742) 3501830 -dictGet test_01037.dict_array (34.416,3.50841) 3501829 -dictGet test_01037.dict_array (34.4212,1.24916) 3498161 -dictGet test_01037.dict_array (34.4251,0.457029) 101 -dictGet test_01037.dict_array (34.4274,-0.902559) 101 -dictGet test_01037.dict_array (34.4325,4.03159) 101 -dictGet test_01037.dict_array (34.438,1.63994) 3498251 -dictGet test_01037.dict_array (34.4403,-0.177594) 101 -dictGet test_01037.dict_array (34.4421,0.726712) 101 -dictGet test_01037.dict_array (34.4517,2.98611) 3501830 -dictGet test_01037.dict_array (34.4658,-1.312) 101 -dictGet test_01037.dict_array (34.4732,-0.0681338) 101 -dictGet test_01037.dict_array (34.4752,2.81646) 3501830 -dictGet test_01037.dict_array (34.4914,2.3858) 3497439 -dictGet test_01037.dict_array (34.4923,0.855231) 101 -dictGet test_01037.dict_array (34.5235,1.78468) 3498251 -dictGet test_01037.dict_array (34.5305,4.10608) 101 -dictGet test_01037.dict_array (34.5389,0.621937) 101 -dictGet test_01037.dict_array (34.5406,3.17145) 101 -dictGet test_01037.dict_array (34.5434,-0.56306) 101 -dictGet test_01037.dict_array (34.5449,3.13311) 3501829 -dictGet test_01037.dict_array (34.5491,2.31572) 3497439 -dictGet test_01037.dict_array (34.5539,2.94028) 3501830 -dictGet test_01037.dict_array (34.5546,-0.208825) 101 -dictGet test_01037.dict_array (34.5549,3.78486) 101 -dictGet test_01037.dict_array (34.5676,0.307148) 101 -dictGet test_01037.dict_array (34.5743,1.5217399999999999) 3501838 -dictGet test_01037.dict_array (34.5775,3.48046) 101 -dictGet test_01037.dict_array (34.5815,2.5243700000000002) 3501830 -dictGet test_01037.dict_array (34.5841,4.21191) 101 -dictGet test_01037.dict_array (34.5887,2.65083) 3501830 -dictGet test_01037.dict_array (34.5937,3.2143) 101 -dictGet test_01037.dict_array (34.6013,-1.0612) 101 -dictGet test_01037.dict_array (34.6089,1.36066) 3501838 -dictGet test_01037.dict_array (34.6103,3.40227) 101 -dictGet test_01037.dict_array (34.6128,1.92276) 3498251 -dictGet test_01037.dict_array (34.6175,2.43627) 3498251 -dictGet test_01037.dict_array (34.6209,3.43776) 101 -dictGet test_01037.dict_array (34.6234,2.60237) 3501830 -dictGet test_01037.dict_array (34.6275,3.52479) 101 -dictGet test_01037.dict_array (34.635,0.568558) 101 -dictGet test_01037.dict_array (34.6373,2.37692) 3498251 -dictGet test_01037.dict_array (34.6375,3.52234) 101 -dictGet test_01037.dict_array (34.6426,2.12397) 3498251 -dictGet test_01037.dict_array (34.6513,2.80915) 3501830 -dictGet test_01037.dict_array (34.6632,2.30039) 3498251 -dictGet test_01037.dict_array (34.6691,1.86582) 3498251 -dictGet test_01037.dict_array (34.6739,0.15342) 101 -dictGet test_01037.dict_array (34.6825,0.0499679) 101 -dictGet test_01037.dict_array (34.6893,0.454326) 101 -dictGet test_01037.dict_array (34.6957,-0.358598) 101 -dictGet test_01037.dict_array (34.6986,0.562679) 101 -dictGet test_01037.dict_array (34.712,1.12114) 101 -dictGet test_01037.dict_array (34.7126,-0.0057301) 101 -dictGet test_01037.dict_array (34.7137,0.0248501) 101 -dictGet test_01037.dict_array (34.7162,1.15623) 101 -dictGet test_01037.dict_array (34.7258,3.95142) 101 -dictGet test_01037.dict_array (34.7347,3.5232099999999997) 101 -dictGet test_01037.dict_array (34.7363,2.23374) 3501830 -dictGet test_01037.dict_array (34.7375,0.397841) 101 -dictGet test_01037.dict_array (34.7423,3.09198) 101 -dictGet test_01037.dict_array (34.7452,3.09029) 101 -dictGet test_01037.dict_array (34.7539,-1.06943) 101 -dictGet test_01037.dict_array (34.7733,-0.00912717) 101 -dictGet test_01037.dict_array (34.774,2.71088) 3501830 -dictGet test_01037.dict_array (34.7771,1.46009) 3501835 -dictGet test_01037.dict_array (34.7782,-1.28308) 101 -dictGet test_01037.dict_array (34.7924,3.63564) 101 -dictGet test_01037.dict_array (34.7939,-0.416676) 101 -dictGet test_01037.dict_array (34.7964,-0.401773) 101 -dictGet test_01037.dict_array (34.7974,0.0286873) 101 -dictGet test_01037.dict_array (34.7975,3.05965) 101 -dictGet test_01037.dict_array (34.8037,3.07263) 101 -dictGet test_01037.dict_array (34.8254,-0.390284) 101 -dictGet test_01037.dict_array (34.828,1.91869) 3498251 -dictGet test_01037.dict_array (34.8289,3.71058) 101 -dictGet test_01037.dict_array (34.8403,2.14606) 3501835 -dictGet test_01037.dict_array (34.8437,2.20617) 3501830 -dictGet test_01037.dict_array (34.8469,2.38435) 3501830 -dictGet test_01037.dict_array (34.86,1.45705) 101 -dictGet test_01037.dict_array (34.8612,0.914248) 101 -dictGet test_01037.dict_array (34.8663,3.4215400000000002) 101 -dictGet test_01037.dict_array (34.8724,-0.375144) 101 -dictGet test_01037.dict_array (34.8795,3.29317) 101 -dictGet test_01037.dict_array (34.8823,1.21988) 101 -dictGet test_01037.dict_array (34.8834,1.07657) 101 -dictGet test_01037.dict_array (34.8837,0.157648) 101 -dictGet test_01037.dict_array (34.8871,-0.9755) 101 -dictGet test_01037.dict_array (34.8871,1.8943699999999999) 3501835 -dictGet test_01037.dict_array (34.889,3.36756) 101 -dictGet test_01037.dict_array (34.8907,1.24874) 101 -dictGet test_01037.dict_array (34.8965,3.13508) 101 -dictGet test_01037.dict_array (34.9042,2.62092) 101 -dictGet test_01037.dict_array (34.9055,-0.0448967) 101 -dictGet test_01037.dict_array (34.9122,0.110576) 101 -dictGet test_01037.dict_array (34.9228,3.60183) 101 -dictGet test_01037.dict_array (34.9237,1.21715) 101 -dictGet test_01037.dict_array (34.9296,1.70459) 3501835 -dictGet test_01037.dict_array (34.941,-1.14663) 101 -dictGet test_01037.dict_array (34.9448,1.18923) 101 -dictGet test_01037.dict_array (34.9462,3.81678) 101 -dictGet test_01037.dict_array (34.9466,0.593463) 101 -dictGet test_01037.dict_array (34.9485,0.150307) 101 -dictGet test_01037.dict_array (34.9542,0.487238) 101 -dictGet test_01037.dict_array (34.9559,2.03473) 3501835 -dictGet test_01037.dict_array (34.9671,-0.960225) 101 -dictGet test_01037.dict_array (34.9711,2.63444) 101 -dictGet test_01037.dict_array (34.9892,0.354775) 101 -dictGet test_01037.dict_array (34.9907,1.40724) 101 -dictGet test_01037.dict_array (34.9916,-0.00173097) 101 -dictGet test_01037.dict_array (34.9919,2.06167) 101 +dictGet dict_array (29.5699,2.50068) 101 +dictGet dict_array (29.5796,1.55456) 101 +dictGet dict_array (29.5796,2.36864) 101 +dictGet dict_array (29.5844,1.59626) 101 +dictGet dict_array (29.5886,4.03321) 101 +dictGet dict_array (29.5914,3.02628) 101 +dictGet dict_array (29.5926,-0.0965169) 101 +dictGet dict_array (29.5968,2.37773) 101 +dictGet dict_array (29.5984,0.755853) 101 +dictGet dict_array (29.6066,3.47173) 101 +dictGet dict_array (29.6085,-1.26007) 6489978 +dictGet dict_array (29.6131,0.246565) 101 +dictGet dict_array (29.6157,-0.266687) 101 +dictGet dict_array (29.6164,2.94674) 101 +dictGet dict_array (29.6195,-0.591941) 101 +dictGet dict_array (29.6231,1.54818) 101 +dictGet dict_array (29.6379,0.764114) 101 +dictGet dict_array (29.6462,-0.772059) 934530 +dictGet dict_array (29.6579,-1.07336) 6489978 +dictGet dict_array (29.6618,-0.271842) 101 +dictGet dict_array (29.6629,-0.303602) 101 +dictGet dict_array (29.6659,-0.782823) 934530 +dictGet dict_array (29.6736,-0.113832) 101 +dictGet dict_array (29.6759,3.02905) 101 +dictGet dict_array (29.6778,3.71898) 101 +dictGet dict_array (29.6796,1.10433) 101 +dictGet dict_array (29.6809,2.13677) 101 +dictGet dict_array (29.6935,4.11894) 101 +dictGet dict_array (29.6991,-1.4458199999999999) 101 +dictGet dict_array (29.6997,3.17297) 101 +dictGet dict_array (29.7043,3.6145899999999997) 101 +dictGet dict_array (29.7065,3.24885) 101 +dictGet dict_array (29.7126,0.28108) 101 +dictGet dict_array (29.7192,0.174273) 101 +dictGet dict_array (29.7217,-0.523481) 3501900 +dictGet dict_array (29.7271,1.67967) 101 +dictGet dict_array (29.7311,4.12444) 101 +dictGet dict_array (29.7347,1.88378) 101 +dictGet dict_array (29.7358,0.67944) 101 +dictGet dict_array (29.7366,-0.2973) 101 +dictGet dict_array (29.7446,0.646536) 101 +dictGet dict_array (29.7453,-0.567963) 3501900 +dictGet dict_array (29.764,4.04217) 101 +dictGet dict_array (29.7655,1.51372) 101 +dictGet dict_array (29.7744,1.12435) 101 +dictGet dict_array (29.7774,-0.0681196) 3501895 +dictGet dict_array (29.7784,1.54864) 101 +dictGet dict_array (29.7785,2.24139) 101 +dictGet dict_array (29.7922,0.220808) 101 +dictGet dict_array (29.7936,2.37709) 101 +dictGet dict_array (29.8008,0.948536) 101 +dictGet dict_array (29.8115,0.201227) 101 +dictGet dict_array (29.814,0.149601) 3501895 +dictGet dict_array (29.8193,-1.35858) 101 +dictGet dict_array (29.8201,0.965518) 101 +dictGet dict_array (29.8265,-0.727286) 3501900 +dictGet dict_array (29.8277,-0.531746) 3501900 +dictGet dict_array (29.8289,3.63009) 101 +dictGet dict_array (29.8548,0.838047) 101 +dictGet dict_array (29.8641,-0.845265) 3501900 +dictGet dict_array (29.8649,0.0562212) 3501895 +dictGet dict_array (29.8701,-1.02045) 934530 +dictGet dict_array (29.8733,2.76654) 101 +dictGet dict_array (29.876,0.555475) 101 +dictGet dict_array (29.8794,-0.800108) 3501900 +dictGet dict_array (29.8813,2.7426399999999997) 101 +dictGet dict_array (29.897100000000002,2.66193) 101 +dictGet dict_array (29.908,4.01339) 101 +dictGet dict_array (29.9165,-1.08246) 3501894 +dictGet dict_array (29.9201,-0.420861) 3498054 +dictGet dict_array (29.9217,3.03778) 101 +dictGet dict_array (29.9355,0.773833) 101 +dictGet dict_array (29.947,3.76517) 101 +dictGet dict_array (29.9518,-0.60557) 3498056 +dictGet dict_array (29.9564,-0.600163) 3498056 +dictGet dict_array (29.959600000000002,4.16591) 101 +dictGet dict_array (29.9615,-1.33708) 3501894 +dictGet dict_array (29.9699,-0.392375) 3498054 +dictGet dict_array (29.9776,1.04552) 101 +dictGet dict_array (29.9784,4.02756) 101 +dictGet dict_array (29.9819,4.00597) 101 +dictGet dict_array (29.9826,1.2816100000000001) 101 +dictGet dict_array (30.0026,2.76257) 101 +dictGet dict_array (30.0126,3.68255) 101 +dictGet dict_array (30.0131,0.796576) 3501892 +dictGet dict_array (30.018,1.16523) 101 +dictGet dict_array (30.0261,-0.210653) 3501896 +dictGet dict_array (30.0472,-1.11007) 3501894 +dictGet dict_array (30.0542,-0.479585) 3498054 +dictGet dict_array (30.0613,1.6278000000000001) 101 +dictGet dict_array (30.0617,-0.0551152) 3501895 +dictGet dict_array (30.0637,2.62066) 101 +dictGet dict_array (30.0721,1.6424400000000001) 101 +dictGet dict_array (30.0769,-0.402636) 3498054 +dictGet dict_array (30.0791,-0.277435) 3501896 +dictGet dict_array (30.0931,0.0327512) 3501895 +dictGet dict_array (30.1059,3.52623) 101 +dictGet dict_array (30.1103,0.865466) 3501892 +dictGet dict_array (30.1115,2.95243) 101 +dictGet dict_array (30.1144,1.71029) 101 +dictGet dict_array (30.1311,-0.864751) 3501899 +dictGet dict_array (30.1336,-0.851386) 3501899 +dictGet dict_array (30.1393,3.89901) 101 +dictGet dict_array (30.1456,-0.531898) 3498054 +dictGet dict_array (30.1492,2.07833) 101 +dictGet dict_array (30.1575,2.43856) 101 +dictGet dict_array (30.1682,1.19771) 101 +dictGet dict_array (30.1716,3.9853300000000003) 101 +dictGet dict_array (30.1849,2.78374) 101 +dictGet dict_array (30.1866,0.65658) 3498021 +dictGet dict_array (30.1885,1.56943) 101 +dictGet dict_array (30.1959,-1.38202) 101 +dictGet dict_array (30.1999,1.58413) 101 +dictGet dict_array (30.2024,0.713081) 3498021 +dictGet dict_array (30.2054,0.620143) 3498021 +dictGet dict_array (30.2091,1.51641) 101 +dictGet dict_array (30.2124,-0.331782) 3498031 +dictGet dict_array (30.226,3.03527) 101 +dictGet dict_array (30.2261,3.18486) 101 +dictGet dict_array (30.2288,2.48407) 101 +dictGet dict_array (30.2345,3.7462400000000002) 101 +dictGet dict_array (30.2375,0.62046) 3498021 +dictGet dict_array (30.2425,-0.472914) 3498054 +dictGet dict_array (30.247,3.95863) 101 +dictGet dict_array (30.2494,-0.305093) 3498031 +dictGet dict_array (30.2499,2.54337) 101 +dictGet dict_array (30.2606,2.16644) 101 +dictGet dict_array (30.2672,3.94847) 101 +dictGet dict_array (30.2709,-0.136264) 6088794 +dictGet dict_array (30.2764,1.18654) 101 +dictGet dict_array (30.2765,1.20383) 101 +dictGet dict_array (30.2839,1.05762) 3498024 +dictGet dict_array (30.286,0.469327) 3498021 +dictGet dict_array (30.2927,3.1693) 101 +dictGet dict_array (30.2935,3.49854) 101 +dictGet dict_array (30.307,0.312338) 3498021 +dictGet dict_array (30.3085,1.07791) 3498024 +dictGet dict_array (30.3139,2.77248) 101 +dictGet dict_array (30.314,0.822823) 3498024 +dictGet dict_array (30.3227,-0.587351) 3498055 +dictGet dict_array (30.332,1.00174) 3498024 +dictGet dict_array (30.3388,0.844148) 3498024 +dictGet dict_array (30.3485,0.561902) 3498021 +dictGet dict_array (30.3497,0.180362) 6489998 +dictGet dict_array (30.361,4.13016) 101 +dictGet dict_array (30.3623,-0.0484027) 6489998 +dictGet dict_array (30.3638,3.9845800000000002) 101 +dictGet dict_array (30.3853,3.16051) 101 +dictGet dict_array (30.3974,2.6617800000000003) 101 +dictGet dict_array (30.4002,-1.15886) 101 +dictGet dict_array (30.4008,-0.387015) 3498031 +dictGet dict_array (30.4018,1.86493) 101 +dictGet dict_array (30.4239,1.16818) 3498024 +dictGet dict_array (30.4363,3.63938) 101 +dictGet dict_array (30.4377,-0.81315) 3498063 +dictGet dict_array (30.4391,3.54703) 101 +dictGet dict_array (30.4424,-1.39435) 101 +dictGet dict_array (30.4441,2.8463000000000003) 101 +dictGet dict_array (30.4517,3.28117) 101 +dictGet dict_array (30.4658,2.6928) 101 +dictGet dict_array (30.4734,2.66161) 101 +dictGet dict_array (30.4799,-1.07578) 101 +dictGet dict_array (30.4837,-1.02486) 3501899 +dictGet dict_array (30.485,1.06326) 3498024 +dictGet dict_array (30.495,1.12306) 101 +dictGet dict_array (30.501,2.27264) 101 +dictGet dict_array (30.5027,1.99382) 101 +dictGet dict_array (30.5194,-1.03943) 3501893 +dictGet dict_array (30.5239,1.04328) 101 +dictGet dict_array (30.528,3.82041) 101 +dictGet dict_array (30.5299,-0.715248) 3498063 +dictGet dict_array (30.5331,1.19603) 101 +dictGet dict_array (30.535800000000002,2.71485) 101 +dictGet dict_array (30.5405,0.804694) 3498023 +dictGet dict_array (30.542,1.23739) 101 +dictGet dict_array (30.5432,4.04189) 101 +dictGet dict_array (30.5457,-0.956121) 3501893 +dictGet dict_array (30.5506,3.07443) 101 +dictGet dict_array (30.5539,3.87084) 101 +dictGet dict_array (30.5578,3.78837) 101 +dictGet dict_array (30.5588,0.966135) 3498022 +dictGet dict_array (30.5637,2.5605) 101 +dictGet dict_array (30.5647,-1.27328) 101 +dictGet dict_array (30.5656,-0.0581332) 6088794 +dictGet dict_array (30.5715,0.65755) 3498023 +dictGet dict_array (30.5727,3.01604) 101 +dictGet dict_array (30.5729,-0.976857) 3501893 +dictGet dict_array (30.5751,0.60204) 3498023 +dictGet dict_array (30.5854,3.02473) 101 +dictGet dict_array (30.5866,0.174099) 6489998 +dictGet dict_array (30.5947,0.875193) 3498023 +dictGet dict_array (30.5992,-0.403901) 3498063 +dictGet dict_array (30.6002,4.18891) 101 +dictGet dict_array (30.6025,0.217712) 6489998 +dictGet dict_array (30.6054,0.927203) 3498022 +dictGet dict_array (30.6075,3.79359) 101 +dictGet dict_array (30.6159,3.82773) 101 +dictGet dict_array (30.627,3.84039) 101 +dictGet dict_array (30.6308,0.77517) 3498023 +dictGet dict_array (30.6338,0.179565) 6489998 +dictGet dict_array (30.6461,1.3293599999999999) 101 +dictGet dict_array (30.6674,-0.424547) 3498063 +dictGet dict_array (30.669,1.76539) 101 +dictGet dict_array (30.6788,4.01239) 101 +dictGet dict_array (30.6864,3.59158) 101 +dictGet dict_array (30.7049,-0.875413) 3501893 +dictGet dict_array (30.705,1.3307) 101 +dictGet dict_array (30.7063,-0.473192) 3498063 +dictGet dict_array (30.7075,-1.1958199999999999) 101 +dictGet dict_array (30.7101,-0.367562) 3498012 +dictGet dict_array (30.7203,2.98725) 101 +dictGet dict_array (30.7213,2.2745699999999998) 101 +dictGet dict_array (30.7446,-0.334144) 3498012 +dictGet dict_array (30.7468,3.82967) 101 +dictGet dict_array (30.747,-0.384779) 3498012 +dictGet dict_array (30.7681,0.904198) 3498022 +dictGet dict_array (30.7757,1.78743) 101 +dictGet dict_array (30.8021,-0.479212) 3498012 +dictGet dict_array (30.8079,-1.40869) 101 +dictGet dict_array (30.8206,-0.0608489) 3498012 +dictGet dict_array (30.8218,0.43909) 3498023 +dictGet dict_array (30.8239,0.10014) 3498012 +dictGet dict_array (30.8282,4.15409) 101 +dictGet dict_array (30.8288,-0.709528) 3501893 +dictGet dict_array (30.8326,0.156011) 3498012 +dictGet dict_array (30.8328,-1.03704) 101 +dictGet dict_array (30.839,2.15528) 101 +dictGet dict_array (30.8452,0.219377) 3498013 +dictGet dict_array (30.8463,0.0515355) 3498012 +dictGet dict_array (30.8526,2.06614) 101 +dictGet dict_array (30.8566,0.517876) 3498023 +dictGet dict_array (30.8588,-1.31738) 101 +dictGet dict_array (30.8681,0.44207) 3498013 +dictGet dict_array (30.8914,1.0072) 3498022 +dictGet dict_array (30.897,0.483425) 3498013 +dictGet dict_array (30.905,2.8731999999999998) 3501793 +dictGet dict_array (30.9051,2.21956) 101 +dictGet dict_array (30.9115,4.00663) 101 +dictGet dict_array (30.9167,-0.834462) 3501893 +dictGet dict_array (30.9252,-1.3289900000000001) 101 +dictGet dict_array (30.9314,1.85384) 101 +dictGet dict_array (30.9392,2.53236) 3501827 +dictGet dict_array (30.9569,2.82038) 3501793 +dictGet dict_array (30.9598,-0.641011) 3498012 +dictGet dict_array (30.9601,-0.254928) 3498012 +dictGet dict_array (30.9623,-1.3886) 101 +dictGet dict_array (30.9707,0.888854) 3498022 +dictGet dict_array (30.9766,2.81957) 3501793 +dictGet dict_array (30.9775,2.69273) 3501793 +dictGet dict_array (30.9821,0.587715) 3498013 +dictGet dict_array (30.9887,4.0233) 101 +dictGet dict_array (30.9914,0.259542) 3498013 +dictGet dict_array (30.9986,-1.36832) 101 +dictGet dict_array (31.008,0.628999) 3498013 +dictGet dict_array (31.0168,-1.17462) 101 +dictGet dict_array (31.0237,3.52547) 3501821 +dictGet dict_array (31.0306,3.78522) 101 +dictGet dict_array (31.0308,-0.72453) 3501893 +dictGet dict_array (31.0463,2.41997) 3501825 +dictGet dict_array (31.047,0.624184) 3498013 +dictGet dict_array (31.0569,0.0706393) 3498015 +dictGet dict_array (31.0583,1.3244099999999999) 3501926 +dictGet dict_array (31.063,3.23861) 3501793 +dictGet dict_array (31.068,0.695575) 3498022 +dictGet dict_array (31.0687,1.85675) 101 +dictGet dict_array (31.0692,0.254793) 3498014 +dictGet dict_array (31.0766,0.828128) 3498022 +dictGet dict_array (31.0833,0.0612782) 3498015 +dictGet dict_array (31.0833,2.59748) 3501793 +dictGet dict_array (31.0861,-1.3778299999999999) 101 +dictGet dict_array (31.0874,3.07258) 3501793 +dictGet dict_array (31.0882,1.4882) 3501926 +dictGet dict_array (31.0924,3.42242) 3501821 +dictGet dict_array (31.0927,2.67448) 3501793 +dictGet dict_array (31.0936,1.12292) 3498022 +dictGet dict_array (31.0952,-0.336928) 3498012 +dictGet dict_array (31.0978,3.48482) 3501826 +dictGet dict_array (31.1107,3.7513199999999998) 3501826 +dictGet dict_array (31.1156,1.19171) 3501926 +dictGet dict_array (31.1176,0.223509) 3498015 +dictGet dict_array (31.1249,0.946838) 3498022 +dictGet dict_array (31.1267,1.48983) 3501926 +dictGet dict_array (31.138,-0.289981) 3501898 +dictGet dict_array (31.1382,3.02904) 3501793 +dictGet dict_array (31.1475,2.6178) 3501793 +dictGet dict_array (31.1491,1.37873) 3501926 +dictGet dict_array (31.1525,3.72105) 3501826 +dictGet dict_array (31.1526,-1.4129800000000001) 101 +dictGet dict_array (31.1526,-0.186457) 3501898 +dictGet dict_array (31.1539,2.78789) 3501793 +dictGet dict_array (31.1548,-1.08552) 101 +dictGet dict_array (31.1567,-0.0768925) 3501898 +dictGet dict_array (31.1613,1.49617) 3501926 +dictGet dict_array (31.1653,1.03777) 3498022 +dictGet dict_array (31.1662,3.4214700000000002) 3501826 +dictGet dict_array (31.1672,-0.0813169) 3501898 +dictGet dict_array (31.177,0.440843) 3498014 +dictGet dict_array (31.1788,-0.737151) 3501893 +dictGet dict_array (31.1856,-0.144396) 3501898 +dictGet dict_array (31.1959,3.66813) 3501826 +dictGet dict_array (31.1996,-0.353983) 3501898 +dictGet dict_array (31.2019,2.86802) 3501793 +dictGet dict_array (31.2087,2.31245) 3501825 +dictGet dict_array (31.2125,3.2713200000000002) 3501793 +dictGet dict_array (31.2137,-0.108129) 3501898 +dictGet dict_array (31.216,3.9156) 101 +dictGet dict_array (31.2201,-0.202141) 3501898 +dictGet dict_array (31.2285,2.09058) 101 +dictGet dict_array (31.2502,4.01526) 101 +dictGet dict_array (31.2585,3.11524) 3501793 +dictGet dict_array (31.2645,-0.620418) 3501890 +dictGet dict_array (31.2684,2.74277) 3501793 +dictGet dict_array (31.2821,-1.12772) 101 +dictGet dict_array (31.2821,2.46769) 3501825 +dictGet dict_array (31.2887,3.91396) 101 +dictGet dict_array (31.295,1.49942) 3501926 +dictGet dict_array (31.2997,3.46122) 3501826 +dictGet dict_array (31.3017,3.3263) 3501826 +dictGet dict_array (31.3022,3.16754) 3501793 +dictGet dict_array (31.3048,0.364962) 3498014 +dictGet dict_array (31.305,3.1967) 3501793 +dictGet dict_array (31.3061,1.84303) 101 +dictGet dict_array (31.3082,-0.173851) 3501898 +dictGet dict_array (31.3315,3.90932) 101 +dictGet dict_array (31.3351,2.80164) 3501793 +dictGet dict_array (31.3388,0.168765) 3498015 +dictGet dict_array (31.339,0.25535) 3498094 +dictGet dict_array (31.3423,1.7036799999999999) 3501926 +dictGet dict_array (31.349,0.386456) 3498014 +dictGet dict_array (31.3558,-1.04336) 101 +dictGet dict_array (31.3564,0.478876) 3498014 +dictGet dict_array (31.3607,-0.0860507) 3498015 +dictGet dict_array (31.3831,3.84469) 101 +dictGet dict_array (31.3886,-0.731137) 3501890 +dictGet dict_array (31.4043,-0.348907) 5457271 +dictGet dict_array (31.4081,1.47391) 3501926 +dictGet dict_array (31.4176,-0.583645) 5457271 +dictGet dict_array (31.4177,1.36972) 3501926 +dictGet dict_array (31.4182,0.958303) 3498022 +dictGet dict_array (31.4199,3.1738) 3501793 +dictGet dict_array (31.4221,2.74876) 3501825 +dictGet dict_array (31.4301,-0.122643) 3498015 +dictGet dict_array (31.4344,1.00661) 3498022 +dictGet dict_array (31.4375,4.20304) 101 +dictGet dict_array (31.4377,0.289608) 3498094 +dictGet dict_array (31.4379,0.54744) 3498014 +dictGet dict_array (31.4459,3.94945) 101 +dictGet dict_array (31.4559,-0.345063) 5457271 +dictGet dict_array (31.464,0.726129) 3498014 +dictGet dict_array (31.4662,-0.299019) 3498015 +dictGet dict_array (31.4671,1.9605299999999999) 3501794 +dictGet dict_array (31.4673,-0.403676) 5457271 +dictGet dict_array (31.4712,-0.237941) 3498015 +dictGet dict_array (31.4816,0.120264) 3498015 +dictGet dict_array (31.4875,0.323483) 3498014 +dictGet dict_array (31.490099999999998,-0.338163) 5457271 +dictGet dict_array (31.4932,0.517674) 3498014 +dictGet dict_array (31.5112,1.9689299999999998) 3501794 +dictGet dict_array (31.5122,2.92785) 3501791 +dictGet dict_array (31.5151,0.166429) 3498094 +dictGet dict_array (31.5174,2.94802) 3501791 +dictGet dict_array (31.5182,4.18776) 101 +dictGet dict_array (31.5238,1.18793) 3498003 +dictGet dict_array (31.5271,3.07446) 3501791 +dictGet dict_array (31.5393,1.58061) 3501794 +dictGet dict_array (31.5421,3.13711) 3501791 +dictGet dict_array (31.5479,2.39897) 3497970 +dictGet dict_array (31.5519,0.99285) 3498003 +dictGet dict_array (31.5685,3.47987) 3501824 +dictGet dict_array (31.5959,0.437382) 3498014 +dictGet dict_array (31.6003,0.194376) 3498094 +dictGet dict_array (31.6026,2.15457) 3501794 +dictGet dict_array (31.606,2.45365) 3497970 +dictGet dict_array (31.6062,-0.453441) 3501890 +dictGet dict_array (31.6107,1.35247) 3497974 +dictGet dict_array (31.6155,3.85588) 101 +dictGet dict_array (31.6222,2.03326) 3501794 +dictGet dict_array (31.6231,-0.123059) 3498083 +dictGet dict_array (31.6244,1.6885599999999998) 3497974 +dictGet dict_array (31.6459,0.669716) 3498014 +dictGet dict_array (31.6563,-0.0644741) 3498083 +dictGet dict_array (31.6618,-0.551121) 3501890 +dictGet dict_array (31.6725,-0.38922) 3498085 +dictGet dict_array (31.6727,4.10336) 101 +dictGet dict_array (31.6739,4.1391) 101 +dictGet dict_array (31.6897,2.8694699999999997) 3501792 +dictGet dict_array (31.6902,3.98792) 101 +dictGet dict_array (31.6945,2.46687) 3497970 +dictGet dict_array (31.6987,-1.3796) 101 +dictGet dict_array (31.7012,2.34845) 3497970 +dictGet dict_array (31.7036,0.0228348) 3501888 +dictGet dict_array (31.7046,3.68111) 3501824 +dictGet dict_array (31.7055,2.92556) 3501792 +dictGet dict_array (31.7102,1.04532) 3498003 +dictGet dict_array (31.7149,-0.443302) 3498085 +dictGet dict_array (31.7195,2.99311) 3501791 +dictGet dict_array (31.7274,0.166719) 3498094 +dictGet dict_array (31.7565,-0.565382) 3498085 +dictGet dict_array (31.7615,0.771626) 3498014 +dictGet dict_array (31.7739,1.8970099999999999) 3497974 +dictGet dict_array (31.7848,1.2623199999999999) 3498003 +dictGet dict_array (31.7912,-0.788599) 101 +dictGet dict_array (31.8011,2.65853) 3497970 +dictGet dict_array (31.8032,-0.0590108) 3501888 +dictGet dict_array (31.8038,1.9618799999999998) 3497974 +dictGet dict_array (31.8098,-1.46851) 101 +dictGet dict_array (31.8131,3.41982) 3501791 +dictGet dict_array (31.8169,3.31059) 3501791 +dictGet dict_array (31.8202,-0.193692) 3501888 +dictGet dict_array (31.8306,1.57586) 3497974 +dictGet dict_array (31.8382,-0.787948) 101 +dictGet dict_array (31.8433,2.49692) 3497970 +dictGet dict_array (31.8436,2.41851) 3497970 +dictGet dict_array (31.8563,-1.10787) 101 +dictGet dict_array (31.8683,0.996504) 3498002 +dictGet dict_array (31.8693,-0.828142) 101 +dictGet dict_array (31.8723,1.08929) 3498003 +dictGet dict_array (31.8737,0.881127) 3498002 +dictGet dict_array (31.8881,-0.58441) 101 +dictGet dict_array (31.9011,0.121349) 3498094 +dictGet dict_array (31.9066,2.13045) 3497965 +dictGet dict_array (31.9142,1.03368) 3498002 +dictGet dict_array (31.9155,3.38363) 3501791 +dictGet dict_array (31.9168,1.3166) 3498004 +dictGet dict_array (31.9185,-1.11879) 101 +dictGet dict_array (31.9186,-0.647948) 101 +dictGet dict_array (31.9311,3.96928) 101 +dictGet dict_array (31.9335,1.47048) 3497974 +dictGet dict_array (31.9443,-1.36175) 101 +dictGet dict_array (31.9481,2.34231) 3497970 +dictGet dict_array (31.9526,1.36565) 3498004 +dictGet dict_array (31.9629,2.5208399999999997) 3497970 +dictGet dict_array (31.9765,0.975783) 3498002 +dictGet dict_array (31.9923,3.31773) 3501791 +dictGet dict_array (31.9994,0.972816) 3498002 +dictGet dict_array (32.001,3.47425) 3501791 +dictGet dict_array (32.0127,2.13874) 3497965 +dictGet dict_array (32.0244,3.2092) 3501792 +dictGet dict_array (32.029,1.18039) 3498004 +dictGet dict_array (32.0315,0.566073) 3498095 +dictGet dict_array (32.0354,1.0766499999999999) 3498004 +dictGet dict_array (32.0399,-1.11576) 101 +dictGet dict_array (32.053,2.16849) 3497965 +dictGet dict_array (32.0542,0.042328) 3498096 +dictGet dict_array (32.0576,2.47001) 3497970 +dictGet dict_array (32.061,3.7498899999999997) 101 +dictGet dict_array (32.0623,1.25134) 3498004 +dictGet dict_array (32.0626,1.9611399999999999) 3497965 +dictGet dict_array (32.0666,-0.0904247) 3498096 +dictGet dict_array (32.0681,2.28442) 3497970 +dictGet dict_array (32.0692,1.50869) 3497981 +dictGet dict_array (32.0724,4.03314) 101 +dictGet dict_array (32.0729,-0.064324) 101 +dictGet dict_array (32.079,0.293758) 3498094 +dictGet dict_array (32.0847,-1.19814) 101 +dictGet dict_array (32.0974,-0.91927) 101 +dictGet dict_array (32.0979,-0.736979) 101 +dictGet dict_array (32.106,-1.33063) 101 +dictGet dict_array (32.1189,0.246715) 3498094 +dictGet dict_array (32.1207,4.00883) 101 +dictGet dict_array (32.1396,1.12402) 3498004 +dictGet dict_array (32.1413,1.5668) 3497981 +dictGet dict_array (32.143,1.35559) 3498004 +dictGet dict_array (32.1538,1.32881) 3498004 +dictGet dict_array (32.1549,4.06552) 101 +dictGet dict_array (32.1555,-0.79275) 101 +dictGet dict_array (32.163,1.17733) 3498004 +dictGet dict_array (32.1634,2.94273) 3501792 +dictGet dict_array (32.1644,1.85666) 3497965 +dictGet dict_array (32.1745,0.435458) 3498095 +dictGet dict_array (32.1765,1.65149) 3497981 +dictGet dict_array (32.1893,2.08924) 3497965 +dictGet dict_array (32.2024,0.222191) 3498093 +dictGet dict_array (32.2107,1.34379) 3497981 +dictGet dict_array (32.2109,3.9018699999999997) 101 +dictGet dict_array (32.2123,1.85233) 3497965 +dictGet dict_array (32.2144,3.72534) 101 +dictGet dict_array (32.2218,2.5386699999999998) 3497970 +dictGet dict_array (32.2279,2.84267) 3497245 +dictGet dict_array (32.2345,3.33295) 3501792 +dictGet dict_array (32.2435,3.85283) 101 +dictGet dict_array (32.2527,-0.480608) 101 +dictGet dict_array (32.2566,-0.837882) 101 +dictGet dict_array (32.2627,2.57708) 3497970 +dictGet dict_array (32.2733,0.244931) 3498096 +dictGet dict_array (32.2761,4.05808) 101 +dictGet dict_array (32.2764,3.78472) 101 +dictGet dict_array (32.2814,-1.26011) 101 +dictGet dict_array (32.2861,3.02427) 3497245 +dictGet dict_array (32.2924,0.928609) 3498004 +dictGet dict_array (32.2963,-0.78543) 101 +dictGet dict_array (32.3039,3.21175) 3501792 +dictGet dict_array (32.3107,0.698287) 3498004 +dictGet dict_array (32.3138,0.0595677) 3498106 +dictGet dict_array (32.3339,0.707056) 3498004 +dictGet dict_array (32.3351,0.415474) 3498106 +dictGet dict_array (32.342,-0.681023) 101 +dictGet dict_array (32.3463,1.83196) 3497126 +dictGet dict_array (32.3494,2.43799) 3497114 +dictGet dict_array (32.3524,3.47049) 3501822 +dictGet dict_array (32.3531,2.33115) 3497114 +dictGet dict_array (32.3602,0.116106) 3498106 +dictGet dict_array (32.3612,1.1598) 3498004 +dictGet dict_array (32.3689,3.34847) 3501822 +dictGet dict_array (32.3695,0.734055) 3498004 +dictGet dict_array (32.3825,3.85017) 101 +dictGet dict_array (32.3835,-1.25491) 101 +dictGet dict_array (32.4018,-0.728568) 101 +dictGet dict_array (32.4044,2.96727) 3497245 +dictGet dict_array (32.4101,2.9988) 3497245 +dictGet dict_array (32.417,-1.12908) 101 +dictGet dict_array (32.4172,4.1952) 101 +dictGet dict_array (32.4239,2.49512) 3497245 +dictGet dict_array (32.4258,4.05137) 101 +dictGet dict_array (32.4264,-0.427357) 101 +dictGet dict_array (32.4274,3.59377) 3501822 +dictGet dict_array (32.4286,-1.24757) 101 +dictGet dict_array (32.4294,3.0665) 3497245 +dictGet dict_array (32.4333,-0.353347) 101 +dictGet dict_array (32.4391,3.64421) 3501822 +dictGet dict_array (32.4401,3.70635) 3501822 +dictGet dict_array (32.45,1.68918) 3497126 +dictGet dict_array (32.4507,-0.133471) 101 +dictGet dict_array (32.4592,0.976458) 3498105 +dictGet dict_array (32.4595,1.89135) 3497126 +dictGet dict_array (32.4604,0.280248) 3498106 +dictGet dict_array (32.4835,0.472731) 3498106 +dictGet dict_array (32.4855,2.01938) 3497126 +dictGet dict_array (32.4872,2.01697) 3497126 +dictGet dict_array (32.4911,0.613106) 3498105 +dictGet dict_array (32.4918,2.17834) 3497114 +dictGet dict_array (32.4947,2.34595) 3497114 +dictGet dict_array (32.5035,2.92234) 3497245 +dictGet dict_array (32.5132,-0.331206) 101 +dictGet dict_array (32.5156,-0.412604) 3501887 +dictGet dict_array (32.5158,2.9067499999999997) 3497245 +dictGet dict_array (32.5249,2.44519) 3497114 +dictGet dict_array (32.5293,-0.790952) 101 +dictGet dict_array (32.5319,3.96854) 101 +dictGet dict_array (32.5518,3.6093) 3501822 +dictGet dict_array (32.5541,3.5225400000000002) 3501822 +dictGet dict_array (32.5569,0.816123) 3498105 +dictGet dict_array (32.5646,1.9775) 3497126 +dictGet dict_array (32.5733,3.81271) 101 +dictGet dict_array (32.5767,0.948327) 3498105 +dictGet dict_array (32.5971,1.76179) 3497126 +dictGet dict_array (32.6035,-0.716157) 101 +dictGet dict_array (32.6087,4.21614) 101 +dictGet dict_array (32.6171,0.024481) 101 +dictGet dict_array (32.6189,-0.775391) 101 +dictGet dict_array (32.6198,2.92081) 3497167 +dictGet dict_array (32.621,-0.970784) 101 +dictGet dict_array (32.6266,0.650009) 3498105 +dictGet dict_array (32.6315,2.15144) 3497126 +dictGet dict_array (32.6385,-0.436803) 101 +dictGet dict_array (32.6449,-0.191292) 101 +dictGet dict_array (32.6535,2.10385) 3497126 +dictGet dict_array (32.6592,3.49973) 3501822 +dictGet dict_array (32.6598,2.5980600000000003) 3497114 +dictGet dict_array (32.6612,2.95681) 3497167 +dictGet dict_array (32.6636,-0.57235) 101 +dictGet dict_array (32.669,-0.382702) 101 +dictGet dict_array (32.6752,1.30748) 3497981 +dictGet dict_array (32.6811,2.9559800000000003) 3497167 +dictGet dict_array (32.6821,0.57336) 3498105 +dictGet dict_array (32.6828,3.91304) 101 +dictGet dict_array (32.6979,3.96868) 101 +dictGet dict_array (32.6983,3.15784) 3497167 +dictGet dict_array (32.7122,0.794293) 3498105 +dictGet dict_array (32.7131,-0.847256) 101 +dictGet dict_array (32.7219,0.883461) 3498105 +dictGet dict_array (32.7228,1.78808) 3497126 +dictGet dict_array (32.7273,-0.206908) 101 +dictGet dict_array (32.7292,0.259331) 3501889 +dictGet dict_array (32.7304,-1.38317) 101 +dictGet dict_array (32.7353,1.01601) 3498105 +dictGet dict_array (32.7354,4.17574) 101 +dictGet dict_array (32.7357,-0.190194) 101 +dictGet dict_array (32.7465,-1.37598) 101 +dictGet dict_array (32.7494,-0.275675) 101 +dictGet dict_array (32.7514,0.128951) 3501889 +dictGet dict_array (32.753,3.44207) 3501822 +dictGet dict_array (32.7686,2.11713) 3497126 +dictGet dict_array (32.7694,1.47159) 3497388 +dictGet dict_array (32.7768,0.0401042) 101 +dictGet dict_array (32.781,-1.34283) 101 +dictGet dict_array (32.7814,1.73876) 3497388 +dictGet dict_array (32.7856,-1.06363) 101 +dictGet dict_array (32.792699999999996,-1.1255600000000001) 101 +dictGet dict_array (32.7941,-0.645447) 101 +dictGet dict_array (32.7946,1.48889) 3497388 +dictGet dict_array (32.797,0.791753) 3501889 +dictGet dict_array (32.7982,-0.537798) 101 +dictGet dict_array (32.8091,2.3611) 3490438 +dictGet dict_array (32.81,1.7130800000000002) 3497388 +dictGet dict_array (32.8174,-0.288322) 101 +dictGet dict_array (32.823,1.6546699999999999) 3497388 +dictGet dict_array (32.8233,1.62108) 3497388 +dictGet dict_array (32.8428,-0.400045) 101 +dictGet dict_array (32.8479,2.13598) 3490438 +dictGet dict_array (32.8524,0.199902) 3501889 +dictGet dict_array (32.8543,3.23553) 3501820 +dictGet dict_array (32.8562,1.31371) 3498117 +dictGet dict_array (32.87,1.44256) 3498117 +dictGet dict_array (32.8789,2.38192) 3490438 +dictGet dict_array (32.8812,2.20734) 3497128 +dictGet dict_array (32.8815,-0.54427) 101 +dictGet dict_array (32.8853,2.4859) 3497128 +dictGet dict_array (32.8909,0.513964) 3501889 +dictGet dict_array (32.9035,2.38999) 3490438 +dictGet dict_array (32.9097,2.48131) 3497128 +dictGet dict_array (32.928,-0.943269) 101 +dictGet dict_array (32.9322,1.13165) 3498104 +dictGet dict_array (32.9348,1.22606) 3498117 +dictGet dict_array (32.9417,3.77998) 3501822 +dictGet dict_array (32.9428,3.11936) 3497167 +dictGet dict_array (32.9482,1.18092) 3498118 +dictGet dict_array (32.9506,0.0609364) 101 +dictGet dict_array (32.953,-0.828308) 101 +dictGet dict_array (32.9593,3.5209099999999998) 3501822 +dictGet dict_array (32.9617,2.07711) 3497128 +dictGet dict_array (32.966,0.693749) 3498104 +dictGet dict_array (32.9668,-0.716432) 101 +dictGet dict_array (32.9702,1.98555) 3497127 +dictGet dict_array (32.9782,1.73819) 3497388 +dictGet dict_array (32.9805,3.71151) 3501822 +dictGet dict_array (32.9821,2.97225) 3497167 +dictGet dict_array (32.995,-0.830301) 101 +dictGet dict_array (33.0234,0.770848) 3498104 +dictGet dict_array (33.0312,-0.340964) 101 +dictGet dict_array (33.0366,-0.756795) 101 +dictGet dict_array (33.0438,0.812871) 3498118 +dictGet dict_array (33.0455,1.84843) 3497127 +dictGet dict_array (33.0498,0.0913292) 101 +dictGet dict_array (33.0506,1.53739) 3497364 +dictGet dict_array (33.0554,2.4265) 3497363 +dictGet dict_array (33.0741,3.61332) 3501822 +dictGet dict_array (33.0765,-0.179985) 101 +dictGet dict_array (33.087,1.46465) 3497399 +dictGet dict_array (33.0906,-0.620383) 101 +dictGet dict_array (33.1047,-1.28027) 101 +dictGet dict_array (33.1072,1.96303) 3497127 +dictGet dict_array (33.1081,-0.897874) 101 +dictGet dict_array (33.1122,1.8950200000000001) 3497127 +dictGet dict_array (33.1237,2.63993) 3497165 +dictGet dict_array (33.1238,0.753963) 3498118 +dictGet dict_array (33.1257,0.495668) 3498102 +dictGet dict_array (33.1258,1.78341) 3497364 +dictGet dict_array (33.127,2.59646) 3497166 +dictGet dict_array (33.1324,-1.23742) 101 +dictGet dict_array (33.1359,3.83491) 101 +dictGet dict_array (33.1628,-0.379588) 101 +dictGet dict_array (33.1679,1.25601) 3498117 +dictGet dict_array (33.1688,-1.35553) 101 +dictGet dict_array (33.181,2.10943) 3497363 +dictGet dict_array (33.1871,2.81171) 3497165 +dictGet dict_array (33.1877,0.771297) 3498118 +dictGet dict_array (33.1883,-0.204797) 101 +dictGet dict_array (33.1886,3.27998) 3501820 +dictGet dict_array (33.1955,0.708907) 3498118 +dictGet dict_array (33.2044,-0.769275) 101 +dictGet dict_array (33.2182,3.36103) 3501820 +dictGet dict_array (33.2192,3.43586) 3501822 +dictGet dict_array (33.2322,-0.916753) 101 +dictGet dict_array (33.2359,-0.81321) 101 +dictGet dict_array (33.238,0.635072) 3498111 +dictGet dict_array (33.2398,3.02588) 3497165 +dictGet dict_array (33.2469,2.35698) 3497363 +dictGet dict_array (33.247,2.3327) 3497363 +dictGet dict_array (33.2579,2.8027100000000003) 3497165 +dictGet dict_array (33.2607,0.321082) 101 +dictGet dict_array (33.2653,0.243336) 101 +dictGet dict_array (33.2758,0.831836) 3498118 +dictGet dict_array (33.2771,0.886536) 3498118 +dictGet dict_array (33.2914,1.16026) 3498117 +dictGet dict_array (33.2914,1.38882) 3497399 +dictGet dict_array (33.2982,-1.16604) 101 +dictGet dict_array (33.2985,0.842556) 3498112 +dictGet dict_array (33.3005,2.8338900000000002) 3497165 +dictGet dict_array (33.305,0.0969475) 101 +dictGet dict_array (33.3072,3.82163) 101 +dictGet dict_array (33.312,3.41475) 3501820 +dictGet dict_array (33.3129,2.46048) 3497166 +dictGet dict_array (33.3134,3.46863) 3501820 +dictGet dict_array (33.3203,2.33139) 3497166 +dictGet dict_array (33.324,0.433701) 101 +dictGet dict_array (33.3338,2.44705) 3497166 +dictGet dict_array (33.337,4.06475) 101 +dictGet dict_array (33.3469,1.08172) 3498126 +dictGet dict_array (33.3538,0.717896) 3498112 +dictGet dict_array (33.3618,1.37899) 3497399 +dictGet dict_array (33.3698,0.547744) 3501862 +dictGet dict_array (33.3705,0.957619) 3498112 +dictGet dict_array (33.3821,3.07258) 3497165 +dictGet dict_array (33.3881,3.0626) 3497165 +dictGet dict_array (33.393,-0.816186) 101 +dictGet dict_array (33.3945,0.869508) 3498110 +dictGet dict_array (33.4001,1.24186) 3498117 +dictGet dict_array (33.4008,2.34911) 3497166 +dictGet dict_array (33.4166,-1.2808899999999999) 101 +dictGet dict_array (33.4167,3.0655) 3497165 +dictGet dict_array (33.4204,2.81887) 3497165 +dictGet dict_array (33.4211,1.71128) 3497400 +dictGet dict_array (33.4237,2.91761) 3497165 +dictGet dict_array (33.4266,1.5955599999999999) 3497399 +dictGet dict_array (33.4353,-0.391392) 101 +dictGet dict_array (33.4362,-0.134658) 101 +dictGet dict_array (33.4386,0.15396) 101 +dictGet dict_array (33.4421,-0.50712) 101 +dictGet dict_array (33.452,0.915829) 3498126 +dictGet dict_array (33.463,-0.0882717) 101 +dictGet dict_array (33.464,-1.00949) 101 +dictGet dict_array (33.4692,0.954092) 3498126 +dictGet dict_array (33.4716,1.9538799999999998) 3497400 +dictGet dict_array (33.4756,1.85836) 3497400 +dictGet dict_array (33.4859,4.0751) 101 +dictGet dict_array (33.4899,3.54193) 3501820 +dictGet dict_array (33.4935,3.49794) 3501820 +dictGet dict_array (33.494,-0.983356) 101 +dictGet dict_array (33.4955,-1.28128) 101 +dictGet dict_array (33.4965,-0.278687) 101 +dictGet dict_array (33.4991,0.647491) 3498110 +dictGet dict_array (33.5076,2.2272) 3497424 +dictGet dict_array (33.5079,-0.498199) 101 +dictGet dict_array (33.5157,0.535034) 3501862 +dictGet dict_array (33.5171,2.49677) 3497166 +dictGet dict_array (33.5255,2.4447200000000002) 3497166 +dictGet dict_array (33.526,4.01194) 101 +dictGet dict_array (33.5288,0.789434) 3498110 +dictGet dict_array (33.5356,-1.17671) 101 +dictGet dict_array (33.5402,1.49152) 3497399 +dictGet dict_array (33.5418,3.45757) 3501820 +dictGet dict_array (33.5428,1.90712) 3497400 +dictGet dict_array (33.5556,-0.55741) 101 +dictGet dict_array (33.5564,0.876858) 3498128 +dictGet dict_array (33.5567,-0.10208) 101 +dictGet dict_array (33.5645,-0.124824) 101 +dictGet dict_array (33.5663,3.4872) 3501820 +dictGet dict_array (33.5716,-0.0107611) 101 +dictGet dict_array (33.578,3.55714) 3501820 +dictGet dict_array (33.5826,-0.49076) 101 +dictGet dict_array (33.5909,0.773737) 3498110 +dictGet dict_array (33.5958,2.9619999999999997) 3497425 +dictGet dict_array (33.6193,-0.919755) 101 +dictGet dict_array (33.6313,0.652132) 3498110 +dictGet dict_array (33.632,0.823351) 3498128 +dictGet dict_array (33.66,2.18998) 3497424 +dictGet dict_array (33.6621,0.535395) 3498135 +dictGet dict_array (33.6726,3.19367) 3497438 +dictGet dict_array (33.6912,1.74522) 3497400 +dictGet dict_array (33.705,0.706397) 3498135 +dictGet dict_array (33.7076,0.7622) 3498128 +dictGet dict_array (33.7112,1.70187) 3497400 +dictGet dict_array (33.7246,-1.14837) 101 +dictGet dict_array (33.7326,2.62413) 3497425 +dictGet dict_array (33.7332,2.82137) 3497425 +dictGet dict_array (33.7434,0.394672) 3498135 +dictGet dict_array (33.7443,1.54557) 3497398 +dictGet dict_array (33.7506,1.57317) 3497398 +dictGet dict_array (33.7526,1.8578999999999999) 3497424 +dictGet dict_array (33.766,4.15013) 101 +dictGet dict_array (33.7834,2.41789) 3497439 +dictGet dict_array (33.7864,0.230935) 101 +dictGet dict_array (33.7965,3.05709) 3497438 +dictGet dict_array (33.7998,3.32881) 3497438 +dictGet dict_array (33.8003,2.97338) 3497425 +dictGet dict_array (33.8007,-1.08962) 101 +dictGet dict_array (33.8022,-0.139488) 101 +dictGet dict_array (33.8065,2.70857) 3497425 +dictGet dict_array (33.8169,-0.607788) 101 +dictGet dict_array (33.8203,0.108512) 3501863 +dictGet dict_array (33.8231,-1.03449) 101 +dictGet dict_array (33.8312,3.49458) 3501829 +dictGet dict_array (33.8342,0.297518) 3501863 +dictGet dict_array (33.8352,0.165872) 101 +dictGet dict_array (33.8354,1.87277) 3497424 +dictGet dict_array (33.8371,1.60103) 3497398 +dictGet dict_array (33.8387,1.9968) 3497424 +dictGet dict_array (33.8403,3.5805) 3501829 +dictGet dict_array (33.8414,-0.703067) 101 +dictGet dict_array (33.844,-0.179472) 101 +dictGet dict_array (33.8468,3.40137) 3501829 +dictGet dict_array (33.8509,4.15334) 101 +dictGet dict_array (33.8539,2.38339) 3497439 +dictGet dict_array (33.858,-1.3122500000000001) 101 +dictGet dict_array (33.859,3.72626) 3501829 +dictGet dict_array (33.8616,2.24433) 3497424 +dictGet dict_array (33.8621,3.01035) 3497438 +dictGet dict_array (33.8623,1.17559) 3498129 +dictGet dict_array (33.8682,2.706) 3497425 +dictGet dict_array (33.8684,0.189231) 3501863 +dictGet dict_array (33.872,1.93574) 3497424 +dictGet dict_array (33.8844,3.80404) 3501829 +dictGet dict_array (33.8888,0.594884) 3498135 +dictGet dict_array (33.8946,2.74161) 3497438 +dictGet dict_array (33.9023,0.6239) 3498135 +dictGet dict_array (33.9057,0.873222) 3498136 +dictGet dict_array (33.9157,-1.26607) 101 +dictGet dict_array (33.92,2.06848) 3497397 +dictGet dict_array (33.9298,-0.00526229) 101 +dictGet dict_array (33.932,3.07063) 3497438 +dictGet dict_array (33.9322,0.629385) 3501864 +dictGet dict_array (33.9367,-1.41955) 101 +dictGet dict_array (33.937,1.42532) 3498173 +dictGet dict_array (33.9375,1.1467100000000001) 3498159 +dictGet dict_array (33.9434,-1.05739) 101 +dictGet dict_array (33.9477,3.34809) 3501829 +dictGet dict_array (33.95,2.21715) 3497397 +dictGet dict_array (33.955799999999996,0.305176) 3501859 +dictGet dict_array (33.9686,-0.28273) 101 +dictGet dict_array (33.9703,4.1255) 3501829 +dictGet dict_array (33.9707,3.08199) 3497438 +dictGet dict_array (33.9754,1.06203) 3498159 +dictGet dict_array (33.9757,3.72468) 3501829 +dictGet dict_array (33.9775,-0.0440599) 101 +dictGet dict_array (33.9777,-0.251484) 101 +dictGet dict_array (33.9789,-0.339374) 101 +dictGet dict_array (33.9849,2.54515) 3497425 +dictGet dict_array (33.9885,-0.318557) 101 +dictGet dict_array (33.9977,1.07175) 3498159 +dictGet dict_array (33.9984,-0.700517) 101 +dictGet dict_array (34.0149,3.53338) 3501829 +dictGet dict_array (34.0173,3.39155) 3501829 +dictGet dict_array (34.0317,3.9579) 3501829 +dictGet dict_array (34.0369,3.83612) 3501829 +dictGet dict_array (34.043,-0.0887221) 101 +dictGet dict_array (34.0487,1.14252) 3498159 +dictGet dict_array (34.052,1.74832) 3497397 +dictGet dict_array (34.0711,-0.898071) 101 +dictGet dict_array (34.0747,1.55057) 3498173 +dictGet dict_array (34.0803,3.16763) 3497438 +dictGet dict_array (34.0872,3.75555) 3501829 +dictGet dict_array (34.0965,1.62038) 3498173 +dictGet dict_array (34.0977,-0.412691) 101 +dictGet dict_array (34.0986,0.0294206) 101 +dictGet dict_array (34.1072,3.15823) 3497438 +dictGet dict_array (34.1092,3.09599) 3497438 +dictGet dict_array (34.1206,1.04637) 3498160 +dictGet dict_array (34.1209,3.13826) 3497438 +dictGet dict_array (34.1265,3.95881) 3501829 +dictGet dict_array (34.1286,-0.539319) 101 +dictGet dict_array (34.1358,3.67451) 3501829 +dictGet dict_array (34.1428,0.136115) 101 +dictGet dict_array (34.157,1.73522) 3497397 +dictGet dict_array (34.1581,1.48001) 3498172 +dictGet dict_array (34.1682,3.42373) 3501829 +dictGet dict_array (34.1683,-1.26511) 101 +dictGet dict_array (34.1684,4.20007) 101 +dictGet dict_array (34.1854,3.32089) 3501829 +dictGet dict_array (34.2022,0.749536) 3501864 +dictGet dict_array (34.2044,3.04865) 3497438 +dictGet dict_array (34.22,-0.500055) 101 +dictGet dict_array (34.2249,0.743775) 3501864 +dictGet dict_array (34.2254,1.34702) 3498172 +dictGet dict_array (34.2355,-0.898843) 101 +dictGet dict_array (34.2394,2.0203699999999998) 3497439 +dictGet dict_array (34.2466,1.83785) 3498251 +dictGet dict_array (34.247,4.09563) 101 +dictGet dict_array (34.2508,2.61312) 3497439 +dictGet dict_array (34.2517,1.69642) 3498251 +dictGet dict_array (34.2564,4.13033) 101 +dictGet dict_array (34.2574,4.18928) 101 +dictGet dict_array (34.2614,-0.478719) 101 +dictGet dict_array (34.2625,2.38088) 3497439 +dictGet dict_array (34.2666,3.1503) 3501829 +dictGet dict_array (34.271,4.02223) 101 +dictGet dict_array (34.2727,0.514755) 101 +dictGet dict_array (34.278,1.98929) 3497439 +dictGet dict_array (34.2798,-0.199208) 101 +dictGet dict_array (34.2804,2.05184) 3497439 +dictGet dict_array (34.2945,-1.11051) 101 +dictGet dict_array (34.3168,-0.0829721) 101 +dictGet dict_array (34.3345,3.4358) 3501829 +dictGet dict_array (34.3377,1.13527) 3498162 +dictGet dict_array (34.3383,1.27891) 3498161 +dictGet dict_array (34.3391,1.47945) 3498161 +dictGet dict_array (34.3441,0.627014) 101 +dictGet dict_array (34.347,2.4853) 3497439 +dictGet dict_array (34.3514,2.16247) 3497439 +dictGet dict_array (34.3627,2.64533) 3497439 +dictGet dict_array (34.3682,-0.227501) 101 +dictGet dict_array (34.3756,4.21248) 101 +dictGet dict_array (34.379,3.96604) 101 +dictGet dict_array (34.3827,1.7518) 3498251 +dictGet dict_array (34.3912,2.8834) 3501830 +dictGet dict_array (34.3919,0.668829) 101 +dictGet dict_array (34.3949,2.00338) 3497439 +dictGet dict_array (34.3987,0.557268) 101 +dictGet dict_array (34.4111,0.768558) 101 +dictGet dict_array (34.4119,2.8742) 3501830 +dictGet dict_array (34.416,3.50841) 3501829 +dictGet dict_array (34.4212,1.24916) 3498161 +dictGet dict_array (34.4251,0.457029) 101 +dictGet dict_array (34.4274,-0.902559) 101 +dictGet dict_array (34.4325,4.03159) 101 +dictGet dict_array (34.438,1.63994) 3498251 +dictGet dict_array (34.4403,-0.177594) 101 +dictGet dict_array (34.4421,0.726712) 101 +dictGet dict_array (34.4517,2.98611) 3501830 +dictGet dict_array (34.4658,-1.312) 101 +dictGet dict_array (34.4732,-0.0681338) 101 +dictGet dict_array (34.4752,2.81646) 3501830 +dictGet dict_array (34.4914,2.3858) 3497439 +dictGet dict_array (34.4923,0.855231) 101 +dictGet dict_array (34.5235,1.78468) 3498251 +dictGet dict_array (34.5305,4.10608) 101 +dictGet dict_array (34.5389,0.621937) 101 +dictGet dict_array (34.5406,3.17145) 101 +dictGet dict_array (34.5434,-0.56306) 101 +dictGet dict_array (34.5449,3.13311) 3501829 +dictGet dict_array (34.5491,2.31572) 3497439 +dictGet dict_array (34.5539,2.94028) 3501830 +dictGet dict_array (34.5546,-0.208825) 101 +dictGet dict_array (34.5549,3.78486) 101 +dictGet dict_array (34.5676,0.307148) 101 +dictGet dict_array (34.5743,1.5217399999999999) 3501838 +dictGet dict_array (34.5775,3.48046) 101 +dictGet dict_array (34.5815,2.5243700000000002) 3501830 +dictGet dict_array (34.5841,4.21191) 101 +dictGet dict_array (34.5887,2.65083) 3501830 +dictGet dict_array (34.5937,3.2143) 101 +dictGet dict_array (34.6013,-1.0612) 101 +dictGet dict_array (34.6089,1.36066) 3501838 +dictGet dict_array (34.6103,3.40227) 101 +dictGet dict_array (34.6128,1.92276) 3498251 +dictGet dict_array (34.6175,2.43627) 3498251 +dictGet dict_array (34.6209,3.43776) 101 +dictGet dict_array (34.6234,2.60237) 3501830 +dictGet dict_array (34.6275,3.52479) 101 +dictGet dict_array (34.635,0.568558) 101 +dictGet dict_array (34.6373,2.37692) 3498251 +dictGet dict_array (34.6375,3.52234) 101 +dictGet dict_array (34.6426,2.12397) 3498251 +dictGet dict_array (34.6513,2.80915) 3501830 +dictGet dict_array (34.6632,2.30039) 3498251 +dictGet dict_array (34.6691,1.86582) 3498251 +dictGet dict_array (34.6739,0.15342) 101 +dictGet dict_array (34.6825,0.0499679) 101 +dictGet dict_array (34.6893,0.454326) 101 +dictGet dict_array (34.6957,-0.358598) 101 +dictGet dict_array (34.6986,0.562679) 101 +dictGet dict_array (34.712,1.12114) 101 +dictGet dict_array (34.7126,-0.0057301) 101 +dictGet dict_array (34.7137,0.0248501) 101 +dictGet dict_array (34.7162,1.15623) 101 +dictGet dict_array (34.7258,3.95142) 101 +dictGet dict_array (34.7347,3.5232099999999997) 101 +dictGet dict_array (34.7363,2.23374) 3501830 +dictGet dict_array (34.7375,0.397841) 101 +dictGet dict_array (34.7423,3.09198) 101 +dictGet dict_array (34.7452,3.09029) 101 +dictGet dict_array (34.7539,-1.06943) 101 +dictGet dict_array (34.7733,-0.00912717) 101 +dictGet dict_array (34.774,2.71088) 3501830 +dictGet dict_array (34.7771,1.46009) 3501835 +dictGet dict_array (34.7782,-1.28308) 101 +dictGet dict_array (34.7924,3.63564) 101 +dictGet dict_array (34.7939,-0.416676) 101 +dictGet dict_array (34.7964,-0.401773) 101 +dictGet dict_array (34.7974,0.0286873) 101 +dictGet dict_array (34.7975,3.05965) 101 +dictGet dict_array (34.8037,3.07263) 101 +dictGet dict_array (34.8254,-0.390284) 101 +dictGet dict_array (34.828,1.91869) 3498251 +dictGet dict_array (34.8289,3.71058) 101 +dictGet dict_array (34.8403,2.14606) 3501835 +dictGet dict_array (34.8437,2.20617) 3501830 +dictGet dict_array (34.8469,2.38435) 3501830 +dictGet dict_array (34.86,1.45705) 101 +dictGet dict_array (34.8612,0.914248) 101 +dictGet dict_array (34.8663,3.4215400000000002) 101 +dictGet dict_array (34.8724,-0.375144) 101 +dictGet dict_array (34.8795,3.29317) 101 +dictGet dict_array (34.8823,1.21988) 101 +dictGet dict_array (34.8834,1.07657) 101 +dictGet dict_array (34.8837,0.157648) 101 +dictGet dict_array (34.8871,-0.9755) 101 +dictGet dict_array (34.8871,1.8943699999999999) 3501835 +dictGet dict_array (34.889,3.36756) 101 +dictGet dict_array (34.8907,1.24874) 101 +dictGet dict_array (34.8965,3.13508) 101 +dictGet dict_array (34.9042,2.62092) 101 +dictGet dict_array (34.9055,-0.0448967) 101 +dictGet dict_array (34.9122,0.110576) 101 +dictGet dict_array (34.9228,3.60183) 101 +dictGet dict_array (34.9237,1.21715) 101 +dictGet dict_array (34.9296,1.70459) 3501835 +dictGet dict_array (34.941,-1.14663) 101 +dictGet dict_array (34.9448,1.18923) 101 +dictGet dict_array (34.9462,3.81678) 101 +dictGet dict_array (34.9466,0.593463) 101 +dictGet dict_array (34.9485,0.150307) 101 +dictGet dict_array (34.9542,0.487238) 101 +dictGet dict_array (34.9559,2.03473) 3501835 +dictGet dict_array (34.9671,-0.960225) 101 +dictGet dict_array (34.9711,2.63444) 101 +dictGet dict_array (34.9892,0.354775) 101 +dictGet dict_array (34.9907,1.40724) 101 +dictGet dict_array (34.9916,-0.00173097) 101 +dictGet dict_array (34.9919,2.06167) 101 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index f6880ae5009..c9cd151a2d9 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,20 +12,17 @@ declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${CURDIR}" $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; -CREATE DATABASE test_01037; -DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" rm "${CURDIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_array; -CREATE TABLE test_01037.polygons_array +CREATE TABLE polygons_array ( key Array(Array(Array(Array(Float64)))), name String, @@ -34,7 +31,7 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" rm "${CURDIR}"/01037_polygon_data @@ -43,27 +40,23 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; + DROP DICTIONARY IF EXISTS dict_array; - CREATE DICTIONARY test_01037.dict_array + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_correctness_fast.ans" "$outputFile" done -$CLICKHOUSE_CLIENT -n --query=" -DROP TABLE test_01037.points; -DROP DATABASE test_01037; -" diff --git a/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh b/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh index 1e754dce786..66732205f95 100755 --- a/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh +++ b/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh @@ -5,13 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db_01038" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE db_01038" - $CLICKHOUSE_CLIENT --query " -CREATE TABLE db_01038.table_for_dict +CREATE TABLE ${CLICKHOUSE_DATABASE}.table_for_dict ( key_column UInt64, value Float64 @@ -19,34 +15,34 @@ CREATE TABLE db_01038.table_for_dict ENGINE = MergeTree() ORDER BY key_column" -$CLICKHOUSE_CLIENT --query "INSERT INTO db_01038.table_for_dict VALUES (1, 1.1)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table_for_dict VALUES (1, 1.1)" $CLICKHOUSE_CLIENT --query " -CREATE DICTIONARY db_01038.dict_with_zero_min_lifetime +CREATE DICTIONARY ${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'db_01038')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB '${CLICKHOUSE_DATABASE}')) LIFETIME(1) LAYOUT(FLAT())" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(1))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(1))" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))" -$CLICKHOUSE_CLIENT --query "INSERT INTO db_01038.table_for_dict VALUES (2, 2.2)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table_for_dict VALUES (2, 2.2)" function check() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))") + query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))") while [ "$query_result" != "2.2" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))") + query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))") done } @@ -55,8 +51,6 @@ export -f check; timeout 10 bash -c check -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(1))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(1))" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db_01038" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))" diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh index 6856f952a47..d558fbf465e 100755 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh @@ -1,17 +1,12 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE dictdb_01041_01040" - $CLICKHOUSE_CLIENT --query " -CREATE TABLE dictdb_01041_01040.dict_invalidate +CREATE TABLE dict_invalidate ENGINE = Memory AS SELECT 122 as dummy, @@ -20,31 +15,31 @@ FROM system.one" $CLICKHOUSE_CLIENT --query " -CREATE DICTIONARY dictdb_01041_01040.invalidate +CREATE DICTIONARY invalidate ( dummy UInt64, two UInt8 EXPRESSION dummy ) PRIMARY KEY dummy -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB 'dictdb_01041_01040' INVALIDATE_QUERY 'select max(last_time) from dictdb_01041_01040.dict_invalidate')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB currentDatabase() INVALIDATE_QUERY 'select max(last_time) from dict_invalidate')) LIFETIME(MIN 0 MAX 1) LAYOUT(FLAT())" -$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(122))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('invalidate', 'two', toUInt64(122))" # No exception happened -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" -$CLICKHOUSE_CLIENT --check_table_dependencies=0 --query "DROP TABLE dictdb_01041_01040.dict_invalidate" +$CLICKHOUSE_CLIENT --check_table_dependencies=0 --query "DROP TABLE dict_invalidate" function check_exception_detected() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) while [ -z "$query_result" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) sleep 0.1 done } @@ -53,10 +48,10 @@ function check_exception_detected() export -f check_exception_detected; timeout 30 bash -c check_exception_detected 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "dictdb_01041_01040.dict_invalidate.*UNKNOWN_TABLE" | wc -l +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1 | grep -Eo "dict_invalidate.*UNKNOWN_TABLE" | wc -l $CLICKHOUSE_CLIENT --query " -CREATE TABLE dictdb_01041_01040.dict_invalidate +CREATE TABLE dict_invalidate ENGINE = Memory AS SELECT 133 as dummy, @@ -65,11 +60,11 @@ FROM system.one" function check_exception_fixed() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) while [ "$query_result" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) sleep 0.1 done } @@ -78,7 +73,5 @@ export -f check_exception_fixed; # it may take a while until dictionary reloads timeout 60 bash -c check_exception_fixed 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 -$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(133))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1 +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('invalidate', 'two', toUInt64(133))" diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index 9d34470c38d..2b075566ac3 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,41 +7,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail # NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used -$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF' -DROP DATABASE IF EXISTS dictdb_01042; -CREATE DATABASE dictdb_01042; -CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO dictdb_01042.table VALUES (12, 102, now()); +$CLICKHOUSE_CLIENT --session_timezone '' --multiquery < ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '12 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (13, 103, now())" -$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (14, 104, now() - INTERVAL 1 DAY)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (13, 103, now())" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (14, 104, now() - INTERVAL 1 DAY)" -while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))")" = -1 ] +while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))")" = -1 ] do sleep 0.5 done -$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))" +$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" -$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY 'dictdb_01042.dict'" +$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY '${CLICKHOUSE_DATABASE}.dict'" -$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01042" +$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sh b/tests/queries/0_stateless/01053_ssd_dictionary.sh index b49144c9b1a..00e5719a9a9 100755 --- a/tests/queries/0_stateless/01053_ssd_dictionary.sh +++ b/tests/queries/0_stateless/01053_ssd_dictionary.sh @@ -6,8 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -n --query=" DROP DATABASE IF EXISTS 01053_db; diff --git a/tests/queries/0_stateless/01055_compact_parts_1.sql b/tests/queries/0_stateless/01055_compact_parts_1.sql index ff5ab722e0f..72048c59a41 100644 --- a/tests/queries/0_stateless/01055_compact_parts_1.sql +++ b/tests/queries/0_stateless/01055_compact_parts_1.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -drop table if exists mt_compact; -drop table if exists mt_compact_2; - create table mt_compact (a Int, s String) engine = MergeTree order by a partition by a settings index_granularity_bytes = 0; alter table mt_compact modify setting min_rows_for_wide_part = 1000; -- { serverError NOT_IMPLEMENTED } @@ -25,5 +20,3 @@ alter table mt_compact modify setting parts_to_delay_insert = 300; alter table mt_compact modify setting min_rows_for_wide_part = 0; show create table mt_compact; - -drop table mt_compact diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 3c70927db25..6ed26c8565f 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest set -e @@ -69,9 +69,6 @@ cat "$DATA_DIR"/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --out - - - # output echo '===' output diff --git a/tests/queries/0_stateless/01069_database_memory.sql b/tests/queries/0_stateless/01069_database_memory.sql index 5aab9175c58..5d2fa4ea11e 100644 --- a/tests/queries/0_stateless/01069_database_memory.sql +++ b/tests/queries/0_stateless/01069_database_memory.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP DATABASE IF EXISTS memory_01069; CREATE DATABASE memory_01069 ENGINE = Memory; SHOW CREATE DATABASE memory_01069; diff --git a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh index 17068dcbdf9..dcd15718416 100755 --- a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh +++ b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race # This is a monkey test used to trigger sanitizers. @@ -7,11 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query="DROP DATABASE IF EXISTS dictdb_01076;" -$CLICKHOUSE_CLIENT --query="CREATE DATABASE dictdb_01076;" - $CLICKHOUSE_CLIENT --query=" -CREATE TABLE dictdb_01076.table_datarace +CREATE TABLE ${CLICKHOUSE_DATABASE}.table_datarace ( key_column UUID, value Float64 @@ -21,17 +18,17 @@ ORDER BY key_column; " $CLICKHOUSE_CLIENT --query=" -INSERT INTO dictdb_01076.table_datarace VALUES ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 1.1), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 2.2), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 3.3); +INSERT INTO ${CLICKHOUSE_DATABASE}.table_datarace VALUES ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 1.1), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 2.2), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 3.3); " $CLICKHOUSE_CLIENT --query=" -CREATE DICTIONARY IF NOT EXISTS dictdb_01076.dict_datarace +CREATE DICTIONARY IF NOT EXISTS ${CLICKHOUSE_DATABASE}.dict_datarace ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_datarace' DB 'dictdb_01076')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_datarace' DB '${CLICKHOUSE_DATABASE}')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 10)); " @@ -41,7 +38,7 @@ function thread1() for _ in {1..50} do # This query will be ended with exception, because source dictionary has UUID as a key type. - $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(1));" + $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_datarace', 'value', toUInt64(1));" done } @@ -51,7 +48,7 @@ function thread2() for _ in {1..50} do # This query will be ended with exception, because source dictionary has UUID as a key type. - $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(2));" + $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_datarace', 'value', toUInt64(2));" done } @@ -67,6 +64,5 @@ wait echo OK -$CLICKHOUSE_CLIENT --query="DROP DICTIONARY dictdb_01076.dict_datarace;" -$CLICKHOUSE_CLIENT --query="DROP TABLE dictdb_01076.table_datarace;" -$CLICKHOUSE_CLIENT --query="DROP DATABASE dictdb_01076;" +$CLICKHOUSE_CLIENT --query="DROP DICTIONARY ${CLICKHOUSE_DATABASE}.dict_datarace;" +$CLICKHOUSE_CLIENT --query="DROP TABLE ${CLICKHOUSE_DATABASE}.table_datarace;" diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index bfdea95fa9e..f05a0fed965 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest +# Tags: zookeeper, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -63,7 +63,7 @@ export -f optimize_thread; export -f insert_thread; -TIMEOUT=30 +TIMEOUT=20 # Sometimes we detach and attach tables timeout $TIMEOUT bash -c alter_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh index ba8d89aad3c..399c9e488a4 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -76,7 +76,7 @@ export -f insert_thread; export -f select_thread; -TIMEOUT=30 +TIMEOUT=20 # Selects should run successfully diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index bdfbf2a47cf..697843be27f 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -28,7 +28,7 @@ CREATE TABLE url (n UInt64, col String) ENGINE=URL ( replace ( - 'https://localhost:8443/?query=' || 'select n, _table from ' || currentDatabase() || '.merge format CSV', ' ', '+' + 'https://localhost:' || getServerPort('https_port') || '/?query=' || 'select n, _table from ' || currentDatabase() || '.merge format CSV', ' ', '+' ), CSV ); @@ -39,7 +39,7 @@ CREATE VIEW view AS SELECT toInt64(n) as n FROM (SELECT toString(n) as n from me SELECT nonexistentsomething; -- { serverError UNKNOWN_IDENTIFIER } CREATE DICTIONARY dict (n UInt64, col String DEFAULT '42') PRIMARY KEY n -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9440 SECURE 1 USER 'default' TABLE 'url')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 1)); +SOURCE(CLICKHOUSE(HOST 'localhost' PORT getServerPort('tcp_port_secure') SECURE 1 USER 'default' TABLE 'url')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 1)); -- dict --> url --> merge |-> distributed -> file (1) -- |-> distributed_tf -> buffer -> file (1) diff --git a/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh index e2ae026eb27..30956ac6c7f 100755 --- a/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/tests/queries/0_stateless/01098_msgpack_format.sh @@ -1,14 +1,11 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; $CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Asia/Istanbul'), datetime64 DateTime64(3, 'Asia/Istanbul'), array Array(UInt32)) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 91f7a276ea3..90128d7a8ad 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -87,7 +87,7 @@ function insert() -TIMEOUT=30 +TIMEOUT=20 create_db $TIMEOUT & sync_db $TIMEOUT & diff --git a/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql b/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql index 65a03993295..1dc727930ab 100644 --- a/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql +++ b/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql @@ -1,29 +1,21 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict ( +CREATE TABLE table_for_dict ( CompanyID String, OSType Enum('UNKNOWN' = 0, 'WINDOWS' = 1, 'LINUX' = 2, 'ANDROID' = 3, 'MAC' = 4), SomeID Int32 ) ENGINE = Memory(); -INSERT INTO database_for_dict.table_for_dict VALUES ('First', 'WINDOWS', 1), ('Second', 'LINUX', 2); +INSERT INTO table_for_dict VALUES ('First', 'WINDOWS', 1), ('Second', 'LINUX', 2); -CREATE DICTIONARY database_for_dict.dict_with_conversion +CREATE DICTIONARY dict_with_conversion ( CompanyID String DEFAULT '', OSType String DEFAULT '', SomeID Int32 DEFAULT 0 ) PRIMARY KEY CompanyID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 20) LAYOUT(COMPLEX_KEY_HASHED()); -SELECT * FROM database_for_dict.dict_with_conversion ORDER BY CompanyID; - -DROP DATABASE IF EXISTS database_for_dict; +SELECT * FROM dict_with_conversion ORDER BY CompanyID; diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 1b1f064ae0b..fed76727a27 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest # Tag no-fasttest: 45 seconds running # Creation of a database with Ordinary engine emits a warning. diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql index 3379acf4d7b..783a728e336 100644 --- a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql +++ b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest DROP DATABASE IF EXISTS conv_main; CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError CANNOT_CREATE_DATABASE } diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql index a324d278c12..6a818d94a58 100644 --- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql @@ -1,11 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -use database_for_dict; - CREATE TABLE date_table ( id UInt32, @@ -24,7 +16,7 @@ CREATE DICTIONARY somedict end Date ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LAYOUT(RANGE_HASHED()) RANGE (MIN start MAX end) LIFETIME(MIN 300 MAX 360); @@ -35,5 +27,3 @@ SELECT * from somedict; SELECT 1 FROM somedict; SHOW TABLES; - -DROP DATABASE database_for_dict; diff --git a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql index 2a1d04e6074..78632ab2463 100644 --- a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql +++ b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql @@ -1,6 +1,3 @@ --- Tags: no-parallel - -DROP TABLE IF EXISTS mt; set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE mt (d Date, x String) ENGINE = MergeTree(d, x, 8192); INSERT INTO mt VALUES ('2106-02-07', 'Hello'), ('1970-01-01', 'World'); diff --git a/tests/queries/0_stateless/01144_multiword_data_types.sql b/tests/queries/0_stateless/01144_multiword_data_types.sql index cc380f82d63..56def658ae0 100644 --- a/tests/queries/0_stateless/01144_multiword_data_types.sql +++ b/tests/queries/0_stateless/01144_multiword_data_types.sql @@ -23,7 +23,7 @@ CREATE TABLE multiword_types ( SHOW CREATE TABLE multiword_types; INSERT INTO multiword_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM multiword_types; +SELECT toTypeName((*,)) FROM multiword_types SETTINGS enable_named_columns_in_function_tuple = 0; CREATE TABLE unsigned_types ( a TINYINT SIGNED, @@ -43,7 +43,7 @@ CREATE TABLE unsigned_types ( SHOW CREATE TABLE unsigned_types; INSERT INTO unsigned_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM unsigned_types; +SELECT toTypeName((*,)) FROM unsigned_types SETTINGS enable_named_columns_in_function_tuple = 0; SELECT CAST('42' AS DOUBLE PRECISION), CAST(42, 'NATIONAL CHARACTER VARYING'), CAST(-1 AS tinyint UnSiGnEd), CAST(65535, ' sMaLlInT signed '); diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 24ec58c9c17..bdffa028846 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -107,7 +107,7 @@ export -f drop_partition_thread; export -f optimize_thread; export -f drop_part_thread; -TIMEOUT=60 +TIMEOUT=40 #timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & timeout $TIMEOUT bash -c 'insert_thread src' & diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 8344bb6f426..2ab7f883367 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-ordinary-database, no-debug +# Tags: long, no-ordinary-database, no-debug # Test is too heavy, avoid parallel run in Flaky Check # shellcheck disable=SC2119 diff --git a/tests/queries/0_stateless/01185_create_or_replace_table.sql b/tests/queries/0_stateless/01185_create_or_replace_table.sql index 11759d0bb0c..801a775e024 100644 --- a/tests/queries/0_stateless/01185_create_or_replace_table.sql +++ b/tests/queries/0_stateless/01185_create_or_replace_table.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database, no-parallel +-- Tags: no-ordinary-database drop table if exists t1; diff --git a/tests/queries/0_stateless/01188_attach_table_from_path.sql b/tests/queries/0_stateless/01188_attach_table_from_path.sql index d1b9493b6c2..026979a0132 100644 --- a/tests/queries/0_stateless/01188_attach_table_from_path.sql +++ b/tests/queries/0_stateless/01188_attach_table_from_path.sql @@ -1,4 +1,4 @@ --- Tags: no-replicated-database, no-parallel +-- Tags: no-replicated-database drop table if exists test; drop table if exists file; diff --git a/tests/queries/0_stateless/01202_array_auc_special.reference b/tests/queries/0_stateless/01202_array_auc_special.reference index 85c230fba58..8f3f0cf1efe 100644 --- a/tests/queries/0_stateless/01202_array_auc_special.reference +++ b/tests/queries/0_stateless/01202_array_auc_special.reference @@ -1,9 +1,9 @@ nan nan nan -0 -1 -0 0.5 1 -0.5 +0 +0.75 +1 +0.75 diff --git a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql index 3e497f9e3a4..a0cacd8bc7a 100644 --- a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql +++ b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql @@ -1,22 +1,15 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01225; -CREATE DATABASE dict_db_01225; - -CREATE TABLE dict_db_01225.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01225.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01225')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SYSTEM RELOAD DICTIONARY dict_db_01225.dict; +SYSTEM RELOAD DICTIONARY dict; -DROP TABLE dict_db_01225.dict; -- { serverError CANNOT_DETACH_DICTIONARY_AS_TABLE } -DROP DICTIONARY dict_db_01225.dict; - -DROP DATABASE dict_db_01225; +DROP TABLE dict; -- { serverError CANNOT_DETACH_DICTIONARY_AS_TABLE } +DROP DICTIONARY dict; diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference index 0358cde1354..3cd8eaa5611 100644 --- a/tests/queries/0_stateless/01232_untuple.reference +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -2,7 +2,7 @@ hello 1 3 world 9 9 (0,1) -key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'5\') +key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v5\') 1 20 20 10 20 30 2 11 20 10 20 30 3 70 20 10 20 30 diff --git a/tests/queries/0_stateless/01232_untuple.sql b/tests/queries/0_stateless/01232_untuple.sql index ccefd13a772..391d08ab859 100644 --- a/tests/queries/0_stateless/01232_untuple.sql +++ b/tests/queries/0_stateless/01232_untuple.sql @@ -1,4 +1,5 @@ SET allow_experimental_analyzer = 1; +SET enable_named_columns_in_function_tuple = 1; select untuple((* except (b),)) from (select 1 a, 2 b, 3 c); select 'hello', untuple((* except (b),)), 'world' from (select 1 a, 2 b, 3 c); diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh new file mode 100755 index 00000000000..1ca953c80d9 --- /dev/null +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function elapsed_sec() +{ + local expr=$1 && shift + local start end + start=$(date +%s.%N) + while ! eval "$expr"; do + sleep 0.5 + done + end=$(date +%s.%N) + $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_01256; + drop table if exists buffer_01256; + + create table data_01256 as system.numbers Engine=Memory(); +" + +echo "min" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 2, 100, /* time */ + 4, 100, /* rows */ + 1, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" +[[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "max" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 2, /* time */ + 0, 100, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "direct" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 100, /* time */ + 0, 9, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 10; + select count() from data_01256; +" + +echo "drop" +$CLICKHOUSE_CLIENT -nm -q " + insert into buffer_01256 select * from system.numbers limit 10; + drop table if exists buffer_01256; + select count() from data_01256; +" + +$CLICKHOUSE_CLIENT -q "drop table data_01256" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql deleted file mode 100644 index 66f93371c29..00000000000 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ /dev/null @@ -1,50 +0,0 @@ --- Tags: no-fasttest - -SET function_sleep_max_microseconds_per_block = 4000000; - -drop table if exists data_01256; -drop table if exists buffer_01256; - -create table data_01256 as system.numbers Engine=Memory(); - -select 'min'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 5, 100, /* time */ - 4, 100, /* rows */ - 1, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- It is enough to ensure that the buffer will be flushed earlier then 2*min_time (10 sec) -select sleepEachRow(9) FORMAT Null SETTINGS function_sleep_max_microseconds_per_block=10e6; -select count() from data_01256; -drop table buffer_01256; - -select 'max'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 2, /* time */ - 0, 100, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- sleep 2 (min time) + 1 (round up) + bias (1) = 4 -select sleepEachRow(2) from numbers(2) FORMAT Null; -select count() from data_01256; -drop table buffer_01256; - -select 'direct'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 100, /* time */ - 0, 9, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 10; -select count() from data_01256; - -select 'drop'; -insert into buffer_01256 select * from system.numbers limit 10; -drop table if exists buffer_01256; -select count() from data_01256; - -drop table data_01256; diff --git a/tests/queries/0_stateless/01254_dict_create_without_db.sql b/tests/queries/0_stateless/01254_dict_create_without_db.sql index 65a2ab52d23..2d4da5af9a9 100644 --- a/tests/queries/0_stateless/01254_dict_create_without_db.sql +++ b/tests/queries/0_stateless/01254_dict_create_without_db.sql @@ -1,9 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01254; -CREATE DATABASE dict_db_01254; -USE dict_db_01254; - CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); CREATE DICTIONARY dict ( @@ -11,15 +5,12 @@ CREATE DICTIONARY dict val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01254')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SYSTEM RELOAD DICTIONARY dict_db_01254.dict; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; - -USE system; -DROP DATABASE dict_db_01254; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql index 206ddeac612..11473c6ce32 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql @@ -1,26 +1,19 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01254; -CREATE DATABASE dict_db_01254; - -CREATE TABLE dict_db_01254.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01254.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01254')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -DETACH DATABASE dict_db_01254; -ATTACH DATABASE dict_db_01254; +DETACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; +ATTACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; -SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict')::Nullable(String), 'NOT_LOADED'); -SYSTEM RELOAD DICTIONARY dict_db_01254.dict; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; - -DROP DATABASE dict_db_01254; +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict')::Nullable(String), 'NOT_LOADED'); +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql b/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql index 432256d33c2..be56806f8d6 100644 --- a/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql +++ b/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql @@ -1,12 +1,6 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -DROP TABLE IF EXISTS database_for_dict.table_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE table_for_dict ( key_column UInt64, second_column UInt64, @@ -15,7 +9,7 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict VALUES (100500, 10000000, 'Hello world'); +INSERT INTO table_for_dict VALUES (100500, 10000000, 'Hello world'); DROP DATABASE IF EXISTS ordinary_db; @@ -30,7 +24,7 @@ CREATE DICTIONARY ordinary_db.dict1 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) SETTINGS(max_result_bytes=1); @@ -40,10 +34,6 @@ SELECT dictGetUInt64('ordinary_db.dict1', 'second_column', toUInt64(100500)); -- SELECT 'END'; -DROP DICTIONARY IF EXISTS ordinary_db.dict1; - DROP DATABASE IF EXISTS ordinary_db; -DROP TABLE IF EXISTS database_for_dict.table_for_dict; - -DROP DATABASE IF EXISTS database_for_dict; +DROP TABLE IF EXISTS table_for_dict; diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index 4f09d197596..7d6389bb86e 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -15,7 +15,7 @@ tmp_path=$(mktemp "$CURDIR/01268_procfs_metrics.XXXXXX") trap 'rm -f $tmp_path' EXIT truncate -s1025 "$tmp_path" -$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars +$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events --storage_file_read_method=pread -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars # NOTE: that OSCPUVirtualTimeMicroseconds is in microseconds, so 1e6 is not enough. $CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(1e8) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds exit 0 diff --git a/tests/queries/0_stateless/01269_alias_type_differs.sql b/tests/queries/0_stateless/01269_alias_type_differs.sql index 64abcf9e367..b78e46f62c8 100644 --- a/tests/queries/0_stateless/01269_alias_type_differs.sql +++ b/tests/queries/0_stateless/01269_alias_type_differs.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS data_01269; CREATE TABLE data_01269 ( diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.sql b/tests/queries/0_stateless/01272_suspicious_codecs.sql index 082a8d08675..1c1d7b58dd0 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.sql +++ b/tests/queries/0_stateless/01272_suspicious_codecs.sql @@ -1,9 +1,5 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS codecs; --- test what should work - CREATE TABLE codecs ( a UInt8 CODEC(LZ4), diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index dadf2f35e6e..f5f31c4a563 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -10,7 +10,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -34,7 +34,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -58,7 +58,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -82,7 +82,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -106,7 +106,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -130,14 +130,14 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 60 select count() from testXA; @@ -154,7 +154,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -178,14 +178,14 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 9d74474c1a4..5918035e9c3 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -28,7 +28,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh index fb7bf5c6fc1..9a80820dd58 100755 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" set allow_deprecated_database_ordinary=1; DROP DATABASE IF EXISTS 01280_db; diff --git a/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh b/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh index 3c11dc5f772..21f46a34514 100755 --- a/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh +++ b/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-fasttest +# Tags: long, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -91,7 +91,7 @@ ${CLICKHOUSE_CLIENT} -n -q " " -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 2d18c45406c..47fe7a9c7d9 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh index b9f1f81da1a..a521accb082 100755 --- a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01323_too_many_threads_bug.sql b/tests/queries/0_stateless/01323_too_many_threads_bug.sql index c377e2c7570..5bf282808c3 100644 --- a/tests/queries/0_stateless/01323_too_many_threads_bug.sql +++ b/tests/queries/0_stateless/01323_too_many_threads_bug.sql @@ -14,11 +14,11 @@ set log_queries = 1; select x from table_01323_many_parts limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; select x from table_01323_many_parts order by x limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; drop table if exists table_01323_many_parts; diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index fcdfa2dec82..2b0709162a3 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 50ade3fad45..41e0a12f369 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.sql b/tests/queries/0_stateless/01355_alter_column_with_order.sql index 0b1b4c42cce..405157fd891 100644 --- a/tests/queries/0_stateless/01355_alter_column_with_order.sql +++ b/tests/queries/0_stateless/01355_alter_column_with_order.sql @@ -1,28 +1,26 @@ --- Tags: no-parallel - -DROP TABLE IF EXISTS alter_test; +DROP TABLE IF EXISTS alter_01355; set allow_deprecated_syntax_for_merge_tree=1; -CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); +CREATE TABLE alter_01355 (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); -ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST; +ALTER TABLE alter_01355 ADD COLUMN Added1 UInt32 FIRST; -ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn; +ALTER TABLE alter_01355 ADD COLUMN Added2 UInt32 AFTER NestedColumn; -ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop; +ALTER TABLE alter_01355 ADD COLUMN Added3 UInt32 AFTER ToDrop; -DESC alter_test; -DETACH TABLE alter_test; -ATTACH TABLE alter_test; -DESC alter_test; +DESC alter_01355; +DETACH TABLE alter_01355; +ATTACH TABLE alter_01355; +DESC alter_01355; -ALTER TABLE alter_test MODIFY COLUMN Added2 UInt32 FIRST; +ALTER TABLE alter_01355 MODIFY COLUMN Added2 UInt32 FIRST; -ALTER TABLE alter_test MODIFY COLUMN Added3 UInt32 AFTER CounterID; +ALTER TABLE alter_01355 MODIFY COLUMN Added3 UInt32 AFTER CounterID; -DESC alter_test; -DETACH TABLE alter_test; -ATTACH TABLE alter_test; -DESC alter_test; +DESC alter_01355; +DETACH TABLE alter_01355; +ATTACH TABLE alter_01355; +DESC alter_01355; -DROP TABLE IF EXISTS alter_test; +DROP TABLE IF EXISTS alter_01355; diff --git a/tests/queries/0_stateless/01355_ilike.sql b/tests/queries/0_stateless/01355_ilike.sql index 6bde62bf47e..1ceb878a5ef 100644 --- a/tests/queries/0_stateless/01355_ilike.sql +++ b/tests/queries/0_stateless/01355_ilike.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SELECT 'Hello' ILIKE ''; SELECT 'Hello' ILIKE '%'; @@ -53,11 +53,7 @@ SELECT 'ощщЁё' ILIKE '%щ%'; SELECT 'ощЩЁё' ILIKE '%ё%'; SHOW TABLES NOT ILIKE '%'; -DROP DATABASE IF EXISTS test_01355; -CREATE DATABASE test_01355; -USE test_01355; CREATE TABLE test1 (x UInt8) ENGINE = Memory; CREATE TABLE test2 (x UInt8) ENGINE = Memory; SHOW TABLES ILIKE 'tES%'; SHOW TABLES NOT ILIKE 'TeS%'; -DROP DATABASE test_01355; diff --git a/tests/queries/0_stateless/01388_clear_all_columns.sql b/tests/queries/0_stateless/01388_clear_all_columns.sql index cc395aa7fb4..07b4fb3de90 100644 --- a/tests/queries/0_stateless/01388_clear_all_columns.sql +++ b/tests/queries/0_stateless/01388_clear_all_columns.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS test; CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO test (x) VALUES (1), (2), (3); diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 854da04b334..e056e147501 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,13 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS db_01391; -CREATE DATABASE db_01391; -USE db_01391; - -DROP TABLE IF EXISTS t; -DROP TABLE IF EXISTS d_src; -DROP DICTIONARY IF EXISTS d; - CREATE TABLE t (click_city_id UInt32, click_country_id UInt32) Engine = Memory; CREATE TABLE d_src (id UInt64, country_id UInt8, name String) Engine = Memory; @@ -16,14 +6,9 @@ INSERT INTO d_src VALUES (0, 0, 'n'); CREATE DICTIONARY d (id UInt32, country_id UInt8, name String) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB 'db_01391' table 'd_src')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB currentDatabase() table 'd_src')) LIFETIME(MIN 1 MAX 1) LAYOUT(HASHED()); SELECT click_country_id FROM t AS cc LEFT JOIN d ON toUInt32(d.id) = cc.click_city_id; SELECT click_country_id FROM t AS cc LEFT JOIN d ON d.country_id < 99 AND d.id = cc.click_city_id; - -DROP DICTIONARY d; -DROP TABLE t; -DROP TABLE d_src; -DROP DATABASE IF EXISTS db_01391; diff --git a/tests/queries/0_stateless/01392_column_resolve.sql b/tests/queries/0_stateless/01392_column_resolve.sql index 72b6af4576a..90a7d9b169a 100644 --- a/tests/queries/0_stateless/01392_column_resolve.sql +++ b/tests/queries/0_stateless/01392_column_resolve.sql @@ -1,16 +1,11 @@ --- Tags: no-parallel +CREATE TABLE tableConversion (conversionId String, value Nullable(Double)) ENGINE = Log(); +CREATE TABLE tableClick (clickId String, conversionId String, value Nullable(Double)) ENGINE = Log(); +CREATE TABLE leftjoin (id String) ENGINE = Log(); -DROP DATABASE IF EXISTS test_01392; -CREATE DATABASE test_01392; - -CREATE TABLE test_01392.tableConversion (conversionId String, value Nullable(Double)) ENGINE = Log(); -CREATE TABLE test_01392.tableClick (clickId String, conversionId String, value Nullable(Double)) ENGINE = Log(); -CREATE TABLE test_01392.leftjoin (id String) ENGINE = Log(); - -INSERT INTO test_01392.tableConversion(conversionId, value) VALUES ('Conversion 1', 1); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 1', 'Conversion 1', 14); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 2', 'Conversion 1', 15); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 3', 'Conversion 1', 16); +INSERT INTO tableConversion(conversionId, value) VALUES ('Conversion 1', 1); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 1', 'Conversion 1', 14); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 2', 'Conversion 1', 15); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 3', 'Conversion 1', 16); SELECT conversion.conversionId AS myConversionId, @@ -18,19 +13,13 @@ SELECT click.myValue AS myValue FROM ( SELECT conversionId, value as myValue - FROM test_01392.tableConversion + FROM tableConversion ) AS conversion INNER JOIN ( SELECT clickId, conversionId, value as myValue - FROM test_01392.tableClick + FROM tableClick ) AS click ON click.conversionId = conversion.conversionId LEFT JOIN ( - SELECT * FROM test_01392.leftjoin + SELECT * FROM leftjoin ) AS dummy ON (dummy.id = conversion.conversionId) ORDER BY myValue; - -DROP TABLE test_01392.tableConversion; -DROP TABLE test_01392.tableClick; -DROP TABLE test_01392.leftjoin; - -DROP DATABASE test_01392; diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 1c1eb4489ee..b81bb75891d 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: replica, no-debug, no-parallel +# Tags: replica, no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01412_cache_dictionary_race.sh b/tests/queries/0_stateless/01412_cache_dictionary_race.sh index 165a461193d..9aa39652021 100755 --- a/tests/queries/0_stateless/01412_cache_dictionary_race.sh +++ b/tests/queries/0_stateless/01412_cache_dictionary_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -54,7 +54,7 @@ function drop_create_table_thread() export -f dict_get_thread; export -f drop_create_table_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01415_sticking_mutations.sh b/tests/queries/0_stateless/01415_sticking_mutations.sh index 821c83fe728..b7c8768a65d 100755 --- a/tests/queries/0_stateless/01415_sticking_mutations.sh +++ b/tests/queries/0_stateless/01415_sticking_mutations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel +# Tags: no-replicated-database set -e diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index 85fdf7ed764..9ba1fe93543 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-debug +# Tags: long, no-debug set -e @@ -27,7 +27,7 @@ function thread_ops() } export -f thread_ops -TIMEOUT=60 +TIMEOUT=30 thread_ops $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01444_create_table_drop_database_race.sh b/tests/queries/0_stateless/01444_create_table_drop_database_race.sh index eb231e71525..ae74efa4e20 100755 --- a/tests/queries/0_stateless/01444_create_table_drop_database_race.sh +++ b/tests/queries/0_stateless/01444_create_table_drop_database_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race set -e @@ -8,20 +8,20 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # This test reproduces "Directory not empty" error in DROP DATABASE query. +export DB=test_$RANDOM function thread1() { while true; do -# ${CLICKHOUSE_CLIENT} --query="SHOW TABLES FROM test_01444" - ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_01444" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" - ${CLICKHOUSE_CLIENT} --query="CREATE DATABASE IF NOT EXISTS test_01444" + ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS $DB" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" + ${CLICKHOUSE_CLIENT} --query="CREATE DATABASE IF NOT EXISTS $DB" done } function thread2() { while true; do - ${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test_01444.t$RANDOM (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null + ${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS $DB.t$RANDOM (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null done } @@ -36,4 +36,4 @@ timeout $TIMEOUT bash -c thread2 & wait -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_01444" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" || exit 0 +${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${DB}" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" || exit 0 diff --git a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh index d83343b3cb3..2bfd350ec51 100755 --- a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh +++ b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race set -e @@ -36,8 +36,8 @@ function g { export -f f; export -f g; -timeout 30 bash -c f > /dev/null & -timeout 30 bash -c g > /dev/null & +timeout 20 bash -c f > /dev/null & +timeout 20 bash -c g > /dev/null & wait $CLICKHOUSE_CLIENT -q "DROP TABLE mem" diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh index 697b32a77ae..a5697a62dc2 100755 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT --multiquery --query " SET allow_suspicious_low_cardinality_types=1; CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String), value2 LowCardinality(UInt64)) ENGINE=Memory(); diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.reference b/tests/queries/0_stateless/01563_distributed_query_finish.reference index c3688b553c4..b48979a492e 100644 --- a/tests/queries/0_stateless/01563_distributed_query_finish.reference +++ b/tests/queries/0_stateless/01563_distributed_query_finish.reference @@ -1,2 +1 @@ -1,0 NETWORK_ERROR=0 diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh index 0019c714e40..e3c5928f108 100755 --- a/tests/queries/0_stateless/01563_distributed_query_finish.sh +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards select * from dist_01247 format Null; EOL -network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") +# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud +for ((i = 0; i < 100; ++i)); do + network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") -opts=( - "--max_distributed_connections=1" - "--optimize_skip_unused_shards=1" - "--optimize_distributed_group_by_sharding_key=1" - "--prefer_localhost_replica=0" -) -$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm < /dev/null & timeout $TIMEOUT bash -c kill_mutation_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index a6af1f2170d..edf93b4b39f 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -163,6 +163,7 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter +Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql index 1301135b4cb..2193fc7a8f4 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -1,3 +1,5 @@ +set query_plan_merge_filters=1; + set allow_experimental_analyzer=1; select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 96327536f89..685fe69642a 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,18 +7,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. - -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ -echo -n aaaaaaaaa > ${CLICKHOUSE_USER_FILES_PATH}/a.txt -echo -n bbbbbbbbb > ${CLICKHOUSE_USER_FILES_PATH}/b.txt -echo -n ccccccccc > ${CLICKHOUSE_USER_FILES_PATH}/c.txt +echo -n aaaaaaaaa > ${USER_FILES_PATH}/a.txt +echo -n bbbbbbbbb > ${USER_FILES_PATH}/b.txt +echo -n ccccccccc > ${USER_FILES_PATH}/c.txt echo -n ccccccccc > /tmp/c.txt -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/dir +mkdir -p ${USER_FILES_PATH}/dir ### 1st TEST in CLIENT mode. @@ -85,15 +78,15 @@ echo "${CLICKHOUSE_LOCAL} --query "'"select file('"'dir'), file('b.txt')"'";echo # Test that the function is not injective -echo -n Hello > ${CLICKHOUSE_USER_FILES_PATH}/a -echo -n Hello > ${CLICKHOUSE_USER_FILES_PATH}/b -echo -n World > ${CLICKHOUSE_USER_FILES_PATH}/c +echo -n Hello > ${USER_FILES_PATH}/a +echo -n Hello > ${USER_FILES_PATH}/b +echo -n World > ${USER_FILES_PATH}/c ${CLICKHOUSE_CLIENT} --query "SELECT file(arrayJoin(['a', 'b', 'c'])) AS s, count() GROUP BY s ORDER BY s" ${CLICKHOUSE_CLIENT} --query "SELECT s, count() FROM file('?', TSV, 's String') GROUP BY s ORDER BY s" # Restore -rm ${CLICKHOUSE_USER_FILES_PATH}/{a,b,c}.txt -rm ${CLICKHOUSE_USER_FILES_PATH}/{a,b,c} +rm ${USER_FILES_PATH}/{a,b,c}.txt +rm ${USER_FILES_PATH}/{a,b,c} rm /tmp/c.txt -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/dir +rm -rf ${USER_FILES_PATH}/dir diff --git a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh index 9167a2d306f..0e5c2862066 100755 --- a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh +++ b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 01684_database_for_cache_dictionary; CREATE DATABASE 01684_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh index 9dd8a41ce5a..5583a9dd5e7 100755 --- a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh +++ b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 01685_database_for_cache_dictionary; CREATE DATABASE 01685_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index 2c4378bb7a4..0f80d659e92 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -1,4 +1,4 @@ --- Tags: long, no-parallel +-- Tags: long drop table if exists t; diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql index 050aa33464e..c7c525ba20e 100644 --- a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql +++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql @@ -25,7 +25,7 @@ create table dictst01747(some_name String, field1 String, field2 UInt8) Engine = as select 'name', 'test', 33; CREATE DICTIONARY default.dict01747 (some_name String, field1 String, field2 UInt8) -PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 +PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE dictst01747 DB currentDatabase() USER 'default')) LIFETIME(MIN 0 MAX 0) LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/queries/0_stateless/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh index ecddcb627b8..022bf488886 100755 --- a/tests/queries/0_stateless/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -228,13 +228,13 @@ function testMySQL() echo "MySQL 'successful login' case is skipped for ${auth_type}." else executeQuery \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${username}', '${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', '${username}', '${password}') LIMIT 1 \ FORMAT Null" fi echo 'Wrong username' executeQueryExpectError \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 \ FORMAT Null" \ | grep -Eq "Code: 279\. DB::Exception: .* invalid_${username}" @@ -246,7 +246,7 @@ function testMySQL() echo "MySQL 'wrong password' case is skipped for ${auth_type}." else executeQueryExpectError \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 \ FORMAT Null" | grep -Eq "Code: 279\. DB::Exception: .* ${username}" fi } @@ -267,11 +267,11 @@ function testMySQL() ## Loging\Logout ## CH is being able to log into itself via PostgreSQL protocol but query fails. #executeQueryExpectError \ - # <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', '${username}', '${password}') LIMIT 1 FORMAT Null" \ + # <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL', 'system', 'one', '${username}', '${password}') LIMIT 1 FORMAT Null" \ # Wrong username executeQueryExpectError \ - <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 FORMAT Null" \ + <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL}', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 FORMAT Null" \ | grep -Eq "Invalid user or password" if [[ "${auth_type}" == "no_password" ]] @@ -281,7 +281,7 @@ function testMySQL() else # Wrong password executeQueryExpectError \ - <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 FORMAT Null" \ + <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL}', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 FORMAT Null" \ | grep -Eq "Invalid user or password" fi } diff --git a/tests/queries/0_stateless/01748_dictionary_table_dot.sql b/tests/queries/0_stateless/01748_dictionary_table_dot.sql index a2364fdf823..993d2e1a635 100644 --- a/tests/queries/0_stateless/01748_dictionary_table_dot.sql +++ b/tests/queries/0_stateless/01748_dictionary_table_dot.sql @@ -22,7 +22,7 @@ CREATE DICTIONARY test_dict `value` String ) PRIMARY KEY key1, key2 -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE `test.txt` PASSWORD '' DB currentDatabase())) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE `test.txt` PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 3600) LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql b/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql index 1eee4090112..3ebc85c47f7 100644 --- a/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql +++ b/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql @@ -10,7 +10,7 @@ CREATE DICTIONARY dict1 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE 'dict1')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dict1')) LAYOUT(DIRECT()); SELECT * FROM dict1; --{serverError BAD_ARGUMENTS} @@ -24,7 +24,7 @@ CREATE DICTIONARY 01780_db.dict2 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DATABASE '01780_db' TABLE 'dict2')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DATABASE '01780_db' TABLE 'dict2')) LAYOUT(DIRECT()); SELECT * FROM 01780_db.dict2; --{serverError BAD_ARGUMENTS} @@ -45,7 +45,7 @@ CREATE DICTIONARY 01780_db.dict3 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE 'dict3_source' DATABASE '01780_db')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dict3_source' DATABASE '01780_db')) LAYOUT(DIRECT()); SELECT * FROM 01780_db.dict3; diff --git a/tests/queries/0_stateless/01825_type_json_btc.sh b/tests/queries/0_stateless/01825_type_json_btc.sh index 1e74166e7a7..ebc5482de7a 100755 --- a/tests/queries/0_stateless/01825_type_json_btc.sh +++ b/tests/queries/0_stateless/01825_type_json_btc.sh @@ -5,10 +5,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -cp $CUR_DIR/data_json/btc_transactions.json ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${CLICKHOUSE_USER_FILES_UNIQUE}/ +rm -rf "${CLICKHOUSE_USER_FILES_UNIQUE:?}"/* +cp $CUR_DIR/data_json/btc_transactions.json ${CLICKHOUSE_USER_FILES_UNIQUE}/ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" @@ -27,4 +26,4 @@ ${CLICKHOUSE_CLIENT} -q "SELECT data.out.spending_outpoints AS outpoints FROM bt ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/btc_transactions.json +rm ${CLICKHOUSE_USER_FILES_UNIQUE}/btc_transactions.json diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_type_json_multiple_files.sh index 089b7991784..453e7a3c78e 100755 --- a/tests/queries/0_stateless/01825_type_json_multiple_files.sh +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.sh @@ -1,23 +1,22 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') -for f in "$user_files_path"/01825_file_*.json; do +for f in "${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE}"_*.json; do [ -e $f ] && rm $f done for i in {0..5}; do - echo "{\"k$i\": 100}" > "$user_files_path"/01825_file_$i.json + echo "{\"k$i\": 100}" > "$USER_FILES_PATH/${CLICKHOUSE_DATABASE}_$i.json" done ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_files (file String, data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" @@ -25,7 +24,7 @@ ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ - SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file, data FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 @@ -34,11 +33,11 @@ ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ - SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ - WHERE _file IN ('01825_file_1.json', '01825_file_3.json')" --allow_experimental_object_type 1 + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ + WHERE _file IN ('${CLICKHOUSE_DATABASE}_1.json', '${CLICKHOUSE_DATABASE}_3.json')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" -rm "$user_files_path"/01825_file_*.json +rm "$USER_FILES_PATH"/${CLICKHOUSE_DATABASE}_*.json diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.sh b/tests/queries/0_stateless/01825_type_json_schema_inference.sh index 5fca608d8bb..e0c283b2230 100755 --- a/tests/queries/0_stateless/01825_type_json_schema_inference.sh +++ b/tests/queries/0_stateless/01825_type_json_schema_inference.sh @@ -10,11 +10,10 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj Object(Nullable('json')), s String) \ ENGINE = MergeTree ORDER BY id" --allow_experimental_object_type 1 -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -filename="${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" +filename="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" echo '{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}' > $filename echo '{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}' >> $filename diff --git a/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh b/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh index 36a2165329b..8336229a643 100755 --- a/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh +++ b/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" SET allow_experimental_bigint_types = 1; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 index bd20d34b684..c2d85cefb18 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 @@ -72,7 +72,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } -SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } +SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 index e4b704247b2..13703771ac8 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 @@ -70,7 +70,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } -SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } +SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; -- { serverError 48 } diff --git a/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh b/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh index b5be39a91df..f79637a7635 100755 --- a/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh +++ b/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none @@ -7,23 +6,27 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -q "drop user if exists u_01889" -${CLICKHOUSE_CLIENT} -q "drop role if exists r_01889" -${CLICKHOUSE_CLIENT} -q "drop policy if exists t_01889_filter on t_01889" -${CLICKHOUSE_CLIENT} -q "create user u_01889 identified with plaintext_password by 'dfsdffdf5t123'" -${CLICKHOUSE_CLIENT} -q "revoke all on *.* from u_01889" -${CLICKHOUSE_CLIENT} -q "create role r_01889" +USER=u_01889$RANDOM +ROLE=r_01889$RANDOM +POLICY=t_01889_filter$RANDOM + +${CLICKHOUSE_CLIENT} -q "drop user if exists $USER" +${CLICKHOUSE_CLIENT} -q "drop role if exists ${ROLE}" +${CLICKHOUSE_CLIENT} -q "drop policy if exists ${POLICY} on t_01889" +${CLICKHOUSE_CLIENT} -q "create user $USER identified with plaintext_password by 'dfsdffdf5t123'" +${CLICKHOUSE_CLIENT} -q "revoke all on *.* from $USER" +${CLICKHOUSE_CLIENT} -q "create role ${ROLE}" ${CLICKHOUSE_CLIENT} -q "create table t_01889(a Int64, user_id String) Engine=MergeTree order by a" -${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, 'u_01889' from numbers(1000)" +${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, '$USER' from numbers(1000)" ${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, 'xxxxxxx' from numbers(1000)" -${CLICKHOUSE_CLIENT} -q "grant select on t_01889 to r_01889" -${CLICKHOUSE_CLIENT} -q "create row policy t_01889_filter ON t_01889 FOR SELECT USING user_id = user() TO r_01889" -${CLICKHOUSE_CLIENT} -q "grant r_01889 to u_01889" -${CLICKHOUSE_CLIENT} -q "alter user u_01889 default role r_01889 settings none" +${CLICKHOUSE_CLIENT} -q "grant select on t_01889 to ${ROLE}" +${CLICKHOUSE_CLIENT} -q "create row policy ${POLICY} ON t_01889 FOR SELECT USING user_id = user() TO ${ROLE}" +${CLICKHOUSE_CLIENT} -q "grant ${ROLE} to $USER" +${CLICKHOUSE_CLIENT} -q "alter user $USER default role ${ROLE} settings none" -${CLICKHOUSE_CLIENT_BINARY} --database=${CLICKHOUSE_DATABASE} --user=u_01889 --password=dfsdffdf5t123 --query="select count() from t_01889" +${CLICKHOUSE_CLIENT_BINARY} --database=${CLICKHOUSE_DATABASE} --user=$USER --password=dfsdffdf5t123 --query="select count() from t_01889" -${CLICKHOUSE_CLIENT} -q "drop user u_01889" -${CLICKHOUSE_CLIENT} -q "drop policy t_01889_filter on t_01889" -${CLICKHOUSE_CLIENT} -q "drop role r_01889" +${CLICKHOUSE_CLIENT} -q "drop user $USER" +${CLICKHOUSE_CLIENT} -q "drop policy ${POLICY} on t_01889" +${CLICKHOUSE_CLIENT} -q "drop role ${ROLE}" ${CLICKHOUSE_CLIENT} -q "drop table t_01889" diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh index 30496af46f6..63ce5dc909c 100755 --- a/tests/queries/0_stateless/01889_sqlite_read_write.sh +++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh @@ -6,15 +6,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p "${user_files_path}/" -chmod 777 "${user_files_path}" - export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" -DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 +DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 DB_PATH2=$CUR_DIR/${CURR_DATABASE}_db2 function cleanup() diff --git a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh index 3676f1429b2..853445daf3f 100755 --- a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh +++ b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS dictionary_array_source_table; CREATE TABLE dictionary_array_source_table diff --git a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh index 6aecb20329a..0b555cf82c2 100755 --- a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh +++ b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS dictionary_nullable_source_table; CREATE TABLE dictionary_nullable_source_table diff --git a/tests/queries/0_stateless/01910_view_dictionary.sql b/tests/queries/0_stateless/01910_view_dictionary.sql index 05a67889825..51f46decadd 100644 --- a/tests/queries/0_stateless/01910_view_dictionary.sql +++ b/tests/queries/0_stateless/01910_view_dictionary.sql @@ -34,7 +34,7 @@ CREATE DICTIONARY flat_dictionary value_ru String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' TABLE 'dictionary_source_view')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' PASSWORD '' TABLE 'dictionary_source_view')) LIFETIME(MIN 1 MAX 1000) LAYOUT(FLAT()); diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 5e1600a0673..edffc0a3807 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error @@ -50,7 +50,7 @@ function insert_thread export -f insert_thread; export -f optimize_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c insert_thread 2> /dev/null & timeout $TIMEOUT bash -c insert_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index 6287156daaf..ba42795333c 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,7 +16,6 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS -SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index 4b230e4f738..0aedef028a2 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -6,10 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/ -cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ +mkdir -p ${USER_FILES_PATH}/ +cp $CUR_DIR/data_zstd/test_01946.zstd ${USER_FILES_PATH}/ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " set min_chunk_bytes_for_parallel_parsing=10485760; diff --git a/tests/queries/0_stateless/02003_compress_bz2.sh b/tests/queries/0_stateless/02003_compress_bz2.sh index b17effb20b6..edc433ad69b 100755 --- a/tests/queries/0_stateless/02003_compress_bz2.sh +++ b/tests/queries/0_stateless/02003_compress_bz2.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02003="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02003}" diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.reference b/tests/queries/0_stateless/02010_array_index_bad_cast.reference index e69de29bb2d..e22493782f0 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.reference +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.reference @@ -0,0 +1,3 @@ +1 +0 +0 diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 14162e0d2e2..590e60eb42e 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,3 +1,4 @@ --- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). SET allow_suspicious_low_cardinality_types=1; -SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError ILLEGAL_COLUMN } +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14 00:00:01'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize(NULL)); diff --git a/tests/queries/0_stateless/02012_compress_lz4.sh b/tests/queries/0_stateless/02012_compress_lz4.sh index aad437c8011..700bff613da 100755 --- a/tests/queries/0_stateless/02012_compress_lz4.sh +++ b/tests/queries/0_stateless/02012_compress_lz4.sh @@ -4,7 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02012="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02012}" diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index ea703d69aa5..29bc28849c8 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -6,30 +6,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" @@ -40,4 +36,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_ ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt diff --git a/tests/queries/0_stateless/02023_storage_filelog.sh b/tests/queries/0_stateless/02023_storage_filelog.sh index 51c8dc8ab3e..798ffe66ed9 100755 --- a/tests/queries/0_stateless/02023_storage_filelog.sh +++ b/tests/queries/0_stateless/02023_storage_filelog.sh @@ -6,57 +6,52 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/j.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/j.txt -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +rm ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/g.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/g.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt for i in {150..200} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt done for i in {200..250} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/i.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/i.txt done ${CLICKHOUSE_CLIENT} --query "attach table file_log;" @@ -68,11 +63,11 @@ ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" -truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 +truncate ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02024_storage_filelog_mv.sh b/tests/queries/0_stateless/02024_storage_filelog_mv.sh index 33c8693648c..a47ca6c7507 100755 --- a/tests/queries/0_stateless/02024_storage_filelog_mv.sh +++ b/tests/queries/0_stateless/02024_storage_filelog_mv.sh @@ -7,21 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "drop table if exists mv;" ${CLICKHOUSE_CLIENT} --query "create Materialized View mv engine=MergeTree order by k as select * from file_log;" @@ -39,17 +34,17 @@ done ${CLICKHOUSE_CLIENT} --query "select * from mv order by k;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt done while true; do @@ -62,4 +57,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from mv order by k;" ${CLICKHOUSE_CLIENT} --query "drop table mv;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index f027b61c3ef..7e414b8863e 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -6,42 +6,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +rm ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" @@ -51,11 +46,11 @@ ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" -truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 +truncate ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh index b0a9a4357f3..b7377fb026a 100755 --- a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh +++ b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh @@ -7,34 +7,27 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* - -chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ for i in {1..10} do - ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" + ${CLICKHOUSE_CLIENT} --query "insert into function file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" for i in {11..20} do - ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" + ${CLICKHOUSE_CLIENT} --query "insert into function file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done ${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 6fcfef23cc7..b34bdc9f670 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas -SERVER_SCHEMADIR=test_02030 +SERVER_SCHEMADIR=${CLICKHOUSE_DATABASE} mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index eab44e74d88..afdb14c4191 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -5,10 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -FILE_PATH="${user_files_path}/file" +FILE_PATH="${USER_FILES_PATH}/file" mkdir -p ${FILE_PATH} chmod 777 ${FILE_PATH} diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index 7ea6739e932..25ad4f37e45 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data +DATA_FILE=${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}_null.data # Wrapper for clickhouse-client to always output in JSONEachRow format, that # way format settings will not affect output. diff --git a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh index a6e704093a2..07d40539358 100755 --- a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh +++ b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh @@ -1,20 +1,17 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -DATA_FILE=$USER_FILES_PATH/test_02103.data +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE}.data FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithNamesAndTypes' 'CSVWithNames' 'CSVWithNamesAndTypes' 'JSONCompactEachRowWithNames' 'JSONCompactEachRowWithNamesAndTypes') for format in "${FORMATS[@]}" do $CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE - $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" + $CLICKHOUSE_CLIENT -q "SELECT * FROM file('${CLICKHOUSE_DATABASE}.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" done rm $DATA_FILE diff --git a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh index b3b156b5787..d46b0704b0e 100755 --- a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh +++ b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$USER_FILES_PATH/test_02104_null.data echo -e '{"s" : "NULLSome string"}' > $DATA_FILE diff --git a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh index c79b5d0eee5..38c68ca0005 100755 --- a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh +++ b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh @@ -5,13 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p "${user_files_path}/" -chmod 777 ${user_files_path} - -FILE_PATH="${user_files_path}/test_table_function_file" +FILE_PATH="${USER_FILES_PATH}/test_table_function_file" function cleanup() { diff --git a/tests/queries/0_stateless/02115_write_buffers_finalize.sh b/tests/queries/0_stateless/02115_write_buffers_finalize.sh index d8a3c29bbbd..fb3a77f6105 100755 --- a/tests/queries/0_stateless/02115_write_buffers_finalize.sh +++ b/tests/queries/0_stateless/02115_write_buffers_finalize.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # Tag no-fasttest: depends on brotli and bzip2 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh index 400bf2a56fa..d925c962da4 100755 --- a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh +++ b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index ccbfc5abe97..d544f1452a9 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -1,65 +1,65 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE=$USER_FILES_PATH/data_02118 +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE} +FILE=${CLICKHOUSE_DATABASE} echo "[\"[1,2,3]trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Array(UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Array(UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-02trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Date')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Date')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-02trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Date32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Date32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-01 03:00:01trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x DateTime')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x DateTime')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-01 03:00:01.0000trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x DateTime64(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x DateTime64(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"42trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x UInt32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x UInt32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"42.4242trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Decimal32(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Decimal32(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"{1:2, 2:3}trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Map(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Map(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"(1, 2)trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Tuple(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Tuple(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"ed9fd45d-6287-47c1-ad9f-d45d628767c1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x UUID')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x UUID')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' rm $DATA_FILE diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index 2d9f236ada9..e0cc8f0ce63 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data -18 18 18 18 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent +18 9 9 9 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 465c8d6136c..fdd75b91b1f 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index ba50902ebea..d2d19b76a1f 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh index 0abf411d38f..4a169897520 100755 --- a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh +++ b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - DATA_FILE=$USER_FILES_PATH/test_02125.data echo "number=1" > $DATA_FILE diff --git a/tests/queries/0_stateless/02126_fix_filelog.sh b/tests/queries/0_stateless/02126_fix_filelog.sh index b266b582428..0e136a34c62 100755 --- a/tests/queries/0_stateless/02126_fix_filelog.sh +++ b/tests/queries/0_stateless/02126_fix_filelog.sh @@ -6,14 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" @@ -21,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=Fil ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('/tmp/aaa.csv', 'CSV');" 2>&1 | grep -q "Code: 36" && echo 'OK' || echo 'FAIL'; ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('/tmp/aaa.csv', 'CSV');" 2>&1 | grep -q "Code: 36" && echo 'OK' || echo 'FAIL'; -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.sh b/tests/queries/0_stateless/02129_skip_quoted_fields.sh index ac702d3c750..701d7a30b68 100755 --- a/tests/queries/0_stateless/02129_skip_quoted_fields.sh +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh index 0c72d0e85a7..44e6ee93599 100755 --- a/tests/queries/0_stateless/02130_parse_quoted_null.sh +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -1,14 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE=$USER_FILES_PATH/test_02130.data -SELECT_QUERY="select * from file('test_02130.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE}.data +SELECT_QUERY="select * from file('${CLICKHOUSE_DATABASE}.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" $CLICKHOUSE_CLIENT -q "drop table if exists test_02130" diff --git a/tests/queries/0_stateless/02149_external_schema_inference.sh b/tests/queries/0_stateless/02149_external_schema_inference.sh index 41f8bfee2bc..edb4e915701 100755 --- a/tests/queries/0_stateless/02149_external_schema_inference.sh +++ b/tests/queries/0_stateless/02149_external_schema_inference.sh @@ -1,20 +1,19 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data +FILE_NAME=test_${CLICKHOUSE_TEST_UNIQUE_NAME}_${CLICKHOUSE_DATABASE}.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME touch $DATA_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('$FILE_NAME', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas -SERVER_SCHEMADIR=test_02149 +SERVER_SCHEMADIR=${CLICKHOUSE_DATABASE} mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/* $SCHEMADIR/$SERVER_SCHEMADIR/ diff --git a/tests/queries/0_stateless/02149_schema_inference.sh b/tests/queries/0_stateless/02149_schema_inference.sh index 856549f2215..fba1e6e9137 100755 --- a/tests/queries/0_stateless/02149_schema_inference.sh +++ b/tests/queries/0_stateless/02149_schema_inference.sh @@ -1,18 +1,16 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data -DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME - +DATA_FILE=${CLICKHOUSE_USER_FILES:?}/$FILE_NAME touch $DATA_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('$FILE_NAME', 'Template', 'val1 char') settings format_template_row='nonexist'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} echo "TSV" diff --git a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh index 8de2ab8c57a..bf247817323 100755 --- a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh +++ b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh @@ -1,14 +1,13 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir $USER_FILES_PATH/test_02149 -FILE_NAME=test_02149/data.Parquet +mkdir $USER_FILES_PATH/${CLICKHOUSE_DATABASE}/ +FILE_NAME=data.Parquet DATA_FILE=$USER_FILES_PATH/$FILE_NAME $CLICKHOUSE_CLIENT -q "select number as num, concat('Str: ', toString(number)) as str, [number, number + 1] as arr from numbers(10) format Parquet" > $DATA_FILE diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference new file mode 100644 index 00000000000..d3d171221e8 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference @@ -0,0 +1,10 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh new file mode 100755 index 00000000000..6d715604d93 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Tags: long, distributed, no-debug, no-tsan, no-msan, no-ubsan, no-asan, no-random-settings, no-random-merge-tree-settings + +# shellcheck disable=SC2154 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +opts=( + --join_algorithm='parallel_hash' +) + +$CLICKHOUSE_CLIENT -nq " + CREATE TABLE t1(a UInt32, b UInt32) ENGINE=MergeTree ORDER BY (); + INSERT INTO t1 SELECT number, number FROM numbers_mt(1e6); + + CREATE TABLE t2(a UInt32, b UInt32) ENGINE=MergeTree ORDER BY (); + INSERT INTO t2 SELECT number, number FROM numbers_mt(1e6); +" + +queries_without_preallocation=() +queries_with_preallocation=() + +run_new_query() { + query_id1="hash_table_sizes_stats_joins_$RANDOM$RANDOM" + # when we see a query for the first time we only collect it stats when execution ends. preallocation will happen only on the next run + queries_without_preallocation+=("$query_id1") + $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id1" -q "$1" --format Null + + query_id2="hash_table_sizes_stats_joins_$RANDOM$RANDOM" + queries_with_preallocation+=("$query_id2") + $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id2" -q "$1" --format Null +} + +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.a" +# it only matters what columns from the right table are part of the join key, as soon as we change them - it is a new cache entry +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.b" +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y USING (a, b)" + +# we already had a join on t2.a, so cache should be populated +query_id="hash_table_sizes_stats_joins_$RANDOM$RANDOM" +queries_with_preallocation+=("$query_id") +$CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.b = y.a" --format Null +# the same query with a different alias for the t2 +query_id="hash_table_sizes_stats_joins_$RANDOM$RANDOM" +queries_with_preallocation+=("$query_id") +$CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x INNER JOIN t2 AS z ON x.b = z.a" --format Null + +# now t1 is the right table +run_new_query "SELECT * FROM t2 AS x INNER JOIN t1 AS y ON x.a = y.a" + +################################## + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + +for i in "${!queries_without_preallocation[@]}"; do + $CLICKHOUSE_CLIENT --param_query_id="${queries_without_preallocation[$i]}" -q " + -- the old analyzer is not supported + SELECT sum(if(getSetting('allow_experimental_analyzer'), ProfileEvents['HashJoinPreallocatedElementsInHashTables'] = 0, 1)) + FROM system.query_log + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND type = 'QueryFinish' + " +done + +for i in "${!queries_with_preallocation[@]}"; do + $CLICKHOUSE_CLIENT --param_query_id="${queries_with_preallocation[$i]}" -q " + -- the old analyzer is not supported + SELECT sum(if(getSetting('allow_experimental_analyzer'), ProfileEvents['HashJoinPreallocatedElementsInHashTables'] > 0, 1)) + FROM system.query_log + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND type = 'QueryFinish' + " +done diff --git a/tests/queries/0_stateless/02167_format_from_file_extension.sh b/tests/queries/0_stateless/02167_format_from_file_extension.sh index 14985233524..0a0efff3228 100755 --- a/tests/queries/0_stateless/02167_format_from_file_extension.sh +++ b/tests/queries/0_stateless/02167_format_from_file_extension.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,28 +7,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function cleanup() { - # this command expects an error message like 'Code: 107. DB::Exception: Received <...> nonexist.txt doesn't exist. (FILE_DOESNT_EXIST)' - user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - rm $user_files_path/test_02167.* + rm ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}_test_02167.* } trap cleanup EXIT for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes do - $CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" - $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format')" - $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format', '$format')" + $CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" + $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.$format')" + $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.$format', '$format')" done -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'auto', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'RowBinary', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'auto', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'RowBinary', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson', 'JSONEachRow', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.ndjson')" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.ndjson', 'JSONEachRow', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack', 'MsgPack', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.messagepack', 'MsgPack', 'x UInt64')" diff --git a/tests/queries/0_stateless/02185_orc_corrupted_file.sh b/tests/queries/0_stateless/02185_orc_corrupted_file.sh index 12510ae3836..8cf4334845d 100755 --- a/tests/queries/0_stateless/02185_orc_corrupted_file.sh +++ b/tests/queries/0_stateless/02185_orc_corrupted_file.sh @@ -5,7 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/ ${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh index 0345a0e6394..dc3cb0de110 100755 --- a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +cp ${CLICKHOUSE_CONFIG_DIR}/users.xml "$CURDIR"/users.xml sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index d49c3610852..73d1c2b9b42 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -q "insert into table function file(data.jsonl, 'JSONEachRow', 'x UInt32 default 42, y String') select number as x, 'String' as y from numbers(10)" $CLICKHOUSE_CLIENT -q "drop table if exists test" diff --git a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh index 37fdde95ea7..344452767cc 100755 --- a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh +++ b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh @@ -6,9 +6,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - function cleanup() { ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${CURR_DATABASE}" @@ -18,7 +15,7 @@ trap cleanup EXIT export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" -DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 +DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 ${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" DROP DATABASE IF EXISTS ${CURR_DATABASE}; diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index 6d86d995143..72b6cc06f26 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-parallel, no-object-storage +-- Tags: long, no-object-storage -- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh index ce545a27317..d4a4e54acbd 100755 --- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02240.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh index e03c62cfc5f..0f37bff45d6 100755 --- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02242.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh index 8ff6e28b123..7bfeb747cc2 100755 --- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -5,7 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02245.parquet DATA_FILE=$USER_FILES_PATH/$FILE_NAME diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh index 233db7a534d..07c2a33c4d5 100755 --- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02149.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh index e8e3bf88ac4..3385f62af38 100755 --- a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh +++ b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02247.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh b/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh index 523b5934543..76133df2b37 100755 --- a/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh +++ b/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_02247.data +FILE_NAME=${CLICKHOUSE_DATABASE}.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME touch $DATA_FILE diff --git a/tests/queries/0_stateless/02270_errors_in_files.sh b/tests/queries/0_stateless/02270_errors_in_files.sh index 517547c6ef8..ab8bb28787d 100755 --- a/tests/queries/0_stateless/02270_errors_in_files.sh +++ b/tests/queries/0_stateless/02270_errors_in_files.sh @@ -13,8 +13,6 @@ echo "Error" > "${CLICKHOUSE_TMP}"/test_02270_2.csv ${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/test_02270*.csv', CSV, 'a String, b String')" 2>&1 | grep -o "test_02270_2.csv" ${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/test_02270*.csv', CSV, 'a String, b String')" --input_format_parallel_parsing 0 2>&1 | grep -o "test_02270_2.csv" -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_1.csv') SELECT 'Hello', 'World'" ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_2.csv') SELECT 'Error'" @@ -27,9 +25,9 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_2.csv. ${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('test_02270*.csv.gz', 'CSV', 'a String, b String')" 2>&1 | grep -o -m1 "test_02270_2.csv.gz" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('test_02270*.csv.gz', 'CSV', 'a String, b String')" --input_format_parallel_parsing 0 2>&1 | grep -o -m1 "test_02270_2.csv.gz" -rm "${CLICKHOUSE_TMP}"/test_02270_1.csv -rm "${CLICKHOUSE_TMP}"/test_02270_2.csv -rm "${user_files_path}"/test_02270_1.csv -rm "${user_files_path}"/test_02270_2.csv -rm "${user_files_path}"/test_02270_1.csv.gz -rm "${user_files_path}"/test_02270_2.csv.gz +rm -f "${CLICKHOUSE_TMP}"/test_02270_1.csv +rm -f "${CLICKHOUSE_TMP}"/test_02270_2.csv +rm -f "${USER_FILES_PATH}"/test_02270_1.csv +rm -f "${USER_FILES_PATH}"/test_02270_2.csv +rm -f "${USER_FILES_PATH}"/test_02270_1.csv.gz +rm -f "${USER_FILES_PATH}"/test_02270_2.csv.gz diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh index 2f6167c3ddf..59528d97b93 100755 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y" diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 4eae5a1abb4..bf605d6f591 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -5,8 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - DATA_FILE=$USER_FILES_PATH/data_02293 $CLICKHOUSE_CLIENT -q "drop table if exists test_02293" diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql index 4406a05df0c..ecf4f9cab93 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -1,3 +1,5 @@ +set enable_named_columns_in_function_tuple = 0; + select arrayMap(x -> 2 * x, []); select toTypeName(arrayMap(x -> 2 * x, [])); select arrayMap((x, y) -> x + y, [], []); diff --git a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh index 5973e24844a..666cb4d87fc 100755 --- a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh +++ b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh @@ -5,29 +5,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. +rm -rf ${USER_FILES_PATH}/file_{0..10}.csv -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +echo '0' > ${USER_FILES_PATH}/file_0.csv +echo '0' > ${USER_FILES_PATH}/file_1.csv +echo '0' > ${USER_FILES_PATH}/file_2.csv +echo '0' > ${USER_FILES_PATH}/file_3.csv +echo '0' > ${USER_FILES_PATH}/file_4.csv +echo '0' > ${USER_FILES_PATH}/file_5.csv +echo '0' > ${USER_FILES_PATH}/file_6.csv +echo '0' > ${USER_FILES_PATH}/file_7.csv +echo '0' > ${USER_FILES_PATH}/file_8.csv +echo '0' > ${USER_FILES_PATH}/file_9.csv +echo '0' > ${USER_FILES_PATH}/file_10.csv -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ - -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_{0..10}.csv - -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_0.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_1.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_2.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_3.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_4.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_5.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_6.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_7.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_8.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_9.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_10.csv - -# echo '' > ${CLICKHOUSE_USER_FILES_PATH}/file_10.csv +# echo '' > ${USER_FILES_PATH}/file_10.csv ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex;" @@ -36,5 +28,5 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_regex (id UInt64) ENGINE = MergeTree() o ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_regex SELECT * FROM file('file_{0..10}.csv','CSV');" ${CLICKHOUSE_CLIENT} -q "SELECT count() from t_regex;" -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_{0..10}.csv; +rm -rf ${USER_FILES_PATH}/file_{0..10}.csv; ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex;" diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh index 650faf6985e..89e5c827a48 100755 --- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh +++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02327 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.sql b/tests/queries/0_stateless/02350_views_max_insert_threads.sql index 25e0fdeadba..a4f7e2546ed 100644 --- a/tests/queries/0_stateless/02350_views_max_insert_threads.sql +++ b/tests/queries/0_stateless/02350_views_max_insert_threads.sql @@ -10,7 +10,7 @@ create materialized view t_mv Engine = Null AS select now() as ts, max(a) from t insert into t select * from numbers_mt(10e6) settings max_threads = 16, max_insert_threads=16, max_block_size=100000; system flush logs; -select arrayUniq(thread_ids)>=16 from system.query_log where +select peak_threads_usage>=16 from system.query_log where event_date >= yesterday() and current_database = currentDatabase() and type = 'QueryFinish' and diff --git a/tests/queries/0_stateless/02353_compression_level.sh b/tests/queries/0_stateless/02353_compression_level.sh index 8d6a9c899ad..9e102d12fed 100755 --- a/tests/queries/0_stateless/02353_compression_level.sh +++ b/tests/queries/0_stateless/02353_compression_level.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # Tag no-fasttest: depends on brotli and bzip2 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02353="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02353}" diff --git a/tests/queries/0_stateless/02358_file_default_value.sh b/tests/queries/0_stateless/02358_file_default_value.sh index a7c4c17c129..0fd97a09546 100755 --- a/tests/queries/0_stateless/02358_file_default_value.sh +++ b/tests/queries/0_stateless/02358_file_default_value.sh @@ -4,7 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02357="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02357}" diff --git a/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh b/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh index b58cfd7ec21..50e07ca8612 100755 --- a/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh +++ b/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh @@ -15,7 +15,7 @@ echo " true - 9000 + ${CLICKHOUSE_PORT_TCP} ${SAFE_DIR} diff --git a/tests/queries/0_stateless/02372_data_race_in_avro.sh b/tests/queries/0_stateless/02372_data_race_in_avro.sh index 50a7ae1e3c5..49c34e31923 100755 --- a/tests/queries/0_stateless/02372_data_race_in_avro.sh +++ b/tests/queries/0_stateless/02372_data_race_in_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh b/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh index 3461287d28a..e95bda2adfb 100755 --- a/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh +++ b/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp $CURDIR/data_avro/corrupted.avro $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "select * from file(corrupted.avro)" 2>&1 | grep -F -q "Cannot read compressed data" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh index 80743a97dd0..ae08941da63 100755 --- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh +++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM mkdir -p $UNIQ_DEST_PATH diff --git a/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh b/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh index 8aad68ffe5c..3028451a5f5 100755 --- a/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh +++ b/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh @@ -5,11 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') mkdir -p $USER_FILES_PATH/test_02402 cp $CURDIR/data_capnp/overflow.capnp $USER_FILES_PATH/test_02402/ -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('test_02402/overflow.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02402 diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..8dd8910c858 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -876,7 +876,6 @@ tryBase58Decode tumble tumbleEnd tumbleStart -tuple tupleConcat tupleDivide tupleDivideByNumber diff --git a/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh b/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh index df304eeeba5..72b55144348 100755 --- a/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh +++ b/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh @@ -9,10 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ -echo -e "1,1\n2,a\nb,3\n4,4\n5,c\n6,6" > ${CLICKHOUSE_USER_FILES_PATH}/a.csv +echo -e "1,1\n2,a\nb,3\n4,4\n5,c\n6,6" > ${USER_FILES_PATH}/a.csv ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" ${CLICKHOUSE_CLIENT} --query "create table data (A UInt8, B UInt8) engine=MergeTree() order by A;" @@ -23,12 +20,12 @@ sleep 2 ${CLICKHOUSE_CLIENT} --query "select * except (time) from file('errors_server', 'CSV', 'time DateTime, database Nullable(String), table Nullable(String), offset UInt32, reason String, raw_data String');" # Client side -${CLICKHOUSE_CLIENT} --input_format_allow_errors_num 4 --input_format_record_errors_file_path "${CLICKHOUSE_USER_FILES_PATH}/errors_client" --query "insert into data(A, B) format CSV" < ${CLICKHOUSE_USER_FILES_PATH}/a.csv +${CLICKHOUSE_CLIENT} --input_format_allow_errors_num 4 --input_format_record_errors_file_path "${USER_FILES_PATH}/errors_client" --query "insert into data(A, B) format CSV" < ${USER_FILES_PATH}/a.csv sleep 2 ${CLICKHOUSE_CLIENT} --query "select * except (time) from file('errors_client', 'CSV', 'time DateTime, database Nullable(String), table Nullable(String), offset UInt32, reason String, raw_data String');" # Restore ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" -rm ${CLICKHOUSE_USER_FILES_PATH}/a.csv -rm ${CLICKHOUSE_USER_FILES_PATH}/errors_server -rm ${CLICKHOUSE_USER_FILES_PATH}/errors_client +rm ${USER_FILES_PATH}/a.csv +rm ${USER_FILES_PATH}/errors_server +rm ${USER_FILES_PATH}/errors_client diff --git a/tests/queries/0_stateless/02422_allow_implicit_no_password.sh b/tests/queries/0_stateless/02422_allow_implicit_no_password.sh index 013c367e079..3c433856be2 100755 --- a/tests/queries/0_stateless/02422_allow_implicit_no_password.sh +++ b/tests/queries/0_stateless/02422_allow_implicit_no_password.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +cp ${CLICKHOUSE_CONFIG_DIR}/users.xml "$CURDIR"/users.xml sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index bd3b689ca3c..d997b7ba830 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -12,11 +12,21 @@ t 2 rdb_default 1 1 s1 r1 1 2 -2 -2 +skip inactive s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 +s1 r1 OK 2 0 +s1 r2 QUEUED 2 0 +s2 r1 QUEUED 2 0 +timeout on active +2 +2 +s1 r1 OK 3 0 +s1 r2 QUEUED 3 0 +s2 r1 QUEUED 3 0 +s9 r9 QUEUED 3 0 +drop replica 2 rdb_default 1 1 s1 r1 1 rdb_default 1 2 s1 r2 0 @@ -24,6 +34,9 @@ rdb_default 1 2 s1 r2 0 2 t t2 +t22 t3 +t33 t4 +t44 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 1604d527f2b..93a5fcee8e2 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,10 +33,27 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +echo 'skip inactive' +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" | sort timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort +# And that it still throws TIMEOUT_EXCEEDED for active replicas +echo 'timeout on active' +db9="${db}_9" +$CLICKHOUSE_CLIENT -q "create database $db9 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's9', 'r9')" +$CLICKHOUSE_CLIENT -q "detach database $db9" +$CLICKHOUSE_CLIENT -q "insert into system.zookeeper(name, path, value) values ('active', '/test/$CLICKHOUSE_DATABASE/rdb/replicas/s9|r9', '$($CLICKHOUSE_CLIENT -q "select serverUUID()")')" + +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=none_only_active -q "create table $db.t22 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t33 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t44 (n int) engine=Log" | sort + +$CLICKHOUSE_CLIENT -q "attach database $db9" +$CLICKHOUSE_CLIENT -q "drop database $db9" + +echo 'drop replica' + $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" $CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh index 05de3f05562..7906f2917c4 100755 --- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh @@ -5,11 +5,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') +cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/${CLICKHOUSE_DATABASE}_10m_rows.csv.xz -cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/ +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('${CLICKHOUSE_DATABASE}_10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('${CLICKHOUSE_DATABASE}_10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" - -rm $USER_FILES_PATH/10m_rows.csv.xz +rm $USER_FILES_PATH/${CLICKHOUSE_DATABASE}_10m_rows.csv.xz diff --git a/tests/queries/0_stateless/02457_bz2_concatenated.sh b/tests/queries/0_stateless/02457_bz2_concatenated.sh index 96e23cbfa2a..a9991cf44e7 100755 --- a/tests/queries/0_stateless/02457_bz2_concatenated.sh +++ b/tests/queries/0_stateless/02457_bz2_concatenated.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02457="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02457}" diff --git a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh index b8430307ea3..b86385b72c4 100755 --- a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh +++ b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh @@ -5,28 +5,26 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir $user_files_path/d1 -touch $user_files_path/d1/text1.txt +mkdir $USER_FILES_PATH/d1 +touch $USER_FILES_PATH/d1/text1.txt for i in {1..2} do - echo $i$'\t'$i >> $user_files_path/d1/text1.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/text1.txt done -mkdir $user_files_path/d1/d2 -touch $user_files_path/d1/d2/text2.txt +mkdir $USER_FILES_PATH/d1/d2 +touch $USER_FILES_PATH/d1/d2/text2.txt for i in {3..4} do - echo $i$'\t'$i >> $user_files_path/d1/d2/text2.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/d2/text2.txt done -mkdir $user_files_path/d1/d2/d3 -touch $user_files_path/d1/d2/d3/text3.txt +mkdir $USER_FILES_PATH/d1/d2/d3 +touch $USER_FILES_PATH/d1/d2/d3/text3.txt for i in {5..6} do - echo $i$'\t'$i >> $user_files_path/d1/d2/d3/text3.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/d2/d3/text3.txt done ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort @@ -35,9 +33,9 @@ ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*/tex*','TSV', 'Index UInt8, Nu ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/**/tex*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort -rm $user_files_path/d1/d2/d3/text3.txt -rmdir $user_files_path/d1/d2/d3 -rm $user_files_path/d1/d2/text2.txt -rmdir $user_files_path/d1/d2 -rm $user_files_path/d1/text1.txt -rmdir $user_files_path/d1 +rm $USER_FILES_PATH/d1/d2/d3/text3.txt +rmdir $USER_FILES_PATH/d1/d2/d3 +rm $USER_FILES_PATH/d1/d2/text2.txt +rmdir $USER_FILES_PATH/d1/d2 +rm $USER_FILES_PATH/d1/text1.txt +rmdir $USER_FILES_PATH/d1 diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index 474a6cd0e47..2975b40c868 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-debug +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh index 6899b31d1d9..935ea03a947 100755 --- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh +++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh @@ -38,7 +38,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE test_race_condition_landing (number Int64, export -f drop_mv; export -f insert; -TIMEOUT=55 +TIMEOUT=50 for i in {1..4} do diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh index 9d78b9893dd..a04c631c411 100755 --- a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02482 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/02483_capnp_decimals.sh b/tests/queries/0_stateless/02483_capnp_decimals.sh index ef545a5539f..bc19b63fc8b 100755 --- a/tests/queries/0_stateless/02483_capnp_decimals.sh +++ b/tests/queries/0_stateless/02483_capnp_decimals.sh @@ -5,19 +5,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -touch $USER_FILES_PATH/data.capnp +touch $CLICKHOUSE_USER_FILES/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02483 -mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR -cp -r $CLIENT_SCHEMADIR/02483_* $SCHEMADIR/$SERVER_SCHEMADIR/ +mkdir -p $CLICKHOUSE_SCHEMA_FILES/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02483_* $CLICKHOUSE_SCHEMA_FILES/$SERVER_SCHEMADIR/ $CLICKHOUSE_CLIENT -q "insert into function file(02483_data.capnp, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6)') select 42.42, 4242.424242 settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message', engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" $CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp, auto, 'decimal64 Decimal64(6), decimal32 Decimal32(3)') settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" -rm $USER_FILES_PATH/data.capnp -rm $USER_FILES_PATH/02483_data.capnp +rm $CLICKHOUSE_USER_FILES/data.capnp +rm $CLICKHOUSE_USER_FILES/02483_data.capnp diff --git a/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference new file mode 100644 index 00000000000..50e44edaecb --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference @@ -0,0 +1,2 @@ +() 0 +() 0 diff --git a/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql new file mode 100644 index 00000000000..8e133143ef8 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql @@ -0,0 +1,2 @@ +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4a4e898c5bd..77ef213b36d 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,12 +332,13 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter ((WHERE + (Projection + Before ORDER BY))) + Filter (HAVING) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh index d3a8743b880..e389cf410e8 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh @@ -1,18 +1,15 @@ #!/usr/bin/env bash - -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE} -mkdir -p $user_files_path/test_02504 - -cp $CURDIR/data_ua_parser/os.yaml ${user_files_path}/test_02504/ -cp $CURDIR/data_ua_parser/browser.yaml ${user_files_path}/test_02504/ -cp $CURDIR/data_ua_parser/device.yaml ${user_files_path}/test_02504/ +cp $CURDIR/data_ua_parser/os.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ +cp $CURDIR/data_ua_parser/browser.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ +cp $CURDIR/data_ua_parser/device.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ $CLICKHOUSE_CLIENT -n --query=" drop dictionary if exists regexp_os; @@ -29,7 +26,7 @@ create dictionary regexp_os os_v4_replacement String default '0' ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/os.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/os.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -41,7 +38,7 @@ create dictionary regexp_browser v2_replacement String default '0' ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/browser.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/browser.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -53,7 +50,7 @@ create dictionary regexp_device model_replacement String ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/device.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/device.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -84,4 +81,4 @@ drop dictionary if exists regexp_device; drop table if exists user_agents; " -rm -rf "$user_files_path/test_02504" +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE} diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh index 7211372f2f7..68a87a14320 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh @@ -1,13 +1,10 @@ #!/usr/bin/env bash - # Tags: use-vectorscan, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - mkdir -p $USER_FILES_PATH/test_02504 yaml=$USER_FILES_PATH/test_02504/test.yaml diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference deleted file mode 100644 index 8d0f56ba185..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference +++ /dev/null @@ -1,173 +0,0 @@ -query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -Hello -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh deleted file mode 100755 index dccb680be42..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-parallel, long - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -function run_with_custom_key { - echo "query='$1' with custom_key='$2'" - for prefer_localhost_replica in 0 1; do - for filter_type in 'default' 'range'; do - for max_replicas in {1..3}; do - echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica" - query="$1 SETTINGS max_parallel_replicas=$max_replicas\ - , parallel_replicas_custom_key='$2'\ - , parallel_replicas_custom_key_filter_type='$filter_type'\ - , prefer_localhost_replica=$prefer_localhost_replica" - $CLICKHOUSE_CLIENT --query="$query" - done - done - done -} - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key"; - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x"; -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')"; - -run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)" - -function run_count_with_custom_key { - run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1" -} - -run_count_with_custom_key "y" -run_count_with_custom_key "cityHash64(y)" -run_count_with_custom_key "cityHash64(y) + 1" - -$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference new file mode 100644 index 00000000000..1bb07f0d916 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_mt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh new file mode 100755 index 00000000000..fad43ea9070 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_mt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String) ENGINE = MergeTree ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_mt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key_distributed { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_distributed "y" +run_count_with_custom_key_distributed "cityHash64(y)" +run_count_with_custom_key_distributed "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) as t1 JOIN 02535_custom_key_mt USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference new file mode 100644 index 00000000000..c6526b506d3 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_rmt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh new file mode 100755 index 00000000000..6350f5027f9 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt (x String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535', 'r1') ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_rmt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt_hash"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt_hash (x String, y UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535_hash', 'r1') ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt_hash SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key "y" +run_count_with_custom_key "cityHash64(y)" +run_count_with_custom_key "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) as t1 JOIN 02535_custom_key_rmt_hash USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_rmt_hash" diff --git a/tests/queries/0_stateless/02541_tuple_element_with_null.sql b/tests/queries/0_stateless/02541_tuple_element_with_null.sql index d2062b60d49..e1581ce3755 100644 --- a/tests/queries/0_stateless/02541_tuple_element_with_null.sql +++ b/tests/queries/0_stateless/02541_tuple_element_with_null.sql @@ -9,7 +9,7 @@ SETTINGS index_granularity = 8192; INSERT INTO test_tuple_element VALUES (tuple(1,2)), (tuple(NULL, 3)); -SELECT +SELECT tupleElement(tuple, 'k1', 0) fine_k1_with_0, tupleElement(tuple, 'k1', NULL) k1_with_null, tupleElement(tuple, 'k2', 0) k2_with_0, diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 70bcd7f255b..9bb0c022752 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,16 +29,20 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform × 2 + (Filter) + FilterTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -64,10 +68,14 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform + (Filter) + FilterTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/02661_read_from_archive.lib b/tests/queries/0_stateless/02661_read_from_archive.lib index 908b6bd38d2..56f1a0f163c 100644 --- a/tests/queries/0_stateless/02661_read_from_archive.lib +++ b/tests/queries/0_stateless/02661_read_from_archive.lib @@ -6,10 +6,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh function read_archive_file() { - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" - $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" - $CLICKHOUSE_CLIENT --query "DESC file('${user_files_path}/$1')" - $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${USER_FILES_PATH}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DESC file('${USER_FILES_PATH}/$1')" + $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${USER_FILES_PATH}/$1')" $CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2" $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" } @@ -20,16 +20,14 @@ function run_archive_test() { extension_without_dot=$(echo $1 | sed -e 's/\.//g') FILE_PREFIX="02661_read_from_archive_${CLICKHOUSE_DATABASE}_$extension_without_dot" - user_files_path=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -o "/[^[:space:]]*nonexist.txt" | awk '{gsub("/nonexist.txt","",$1); print $1}') - touch ${FILE_PREFIX}_data0.csv echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" echo "archive1 data1.csv" read_archive_file "${FILE_PREFIX}_archive1.$1 :: ${FILE_PREFIX}_data1.csv" @@ -44,10 +42,10 @@ function run_archive_test() { echo "archive* {2..3}.csv" read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" - rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1 + rm ${USER_FILES_PATH}/${FILE_PREFIX}_archive{1..3}.$1 rm ${FILE_PREFIX}_data{0..3}.csv } diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh index 17d1fa92377..3e629ece33f 100755 --- a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, zookeeper, no-fasttest, no-parallel +# Tags: no-ordinary-database, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -37,7 +37,7 @@ function create_drop_loop() export -f create_drop_loop; THREADS=10 -TIMEOUT=30 +TIMEOUT=20 for i in `seq $THREADS` do diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 374dd246c96..2d0ff256c95 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -5,12 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# see 01658_read_file_to_stringcolumn.sh -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - # Prepare data unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} -user_files_tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +user_files_tmp_dir=${USER_FILES_PATH}/${unique_name} mkdir -p ${user_files_tmp_dir}/tmp/ echo '"id","str","int","text"' > ${user_files_tmp_dir}/tmp.csv echo '1,"abc",123,"abacaba"' >> ${user_files_tmp_dir}/tmp.csv diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.sh b/tests/queries/0_stateless/02724_decompress_filename_exception.sh index e413910b934..8b5a2f23aa9 100755 --- a/tests/queries/0_stateless/02724_decompress_filename_exception.sh +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx" echo 'corrupted file' > $FILENAME; diff --git a/tests/queries/0_stateless/02732_rename_after_processing.sh b/tests/queries/0_stateless/02732_rename_after_processing.sh index 9d44ff9fc34..c3f2274570e 100755 --- a/tests/queries/0_stateless/02732_rename_after_processing.sh +++ b/tests/queries/0_stateless/02732_rename_after_processing.sh @@ -4,12 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# see 01658_read_file_to_stringcolumn.sh -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - # Prepare data unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} -tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +tmp_dir=${USER_FILES_PATH}/${unique_name} mkdir -p $tmp_dir rm -rf ${tmp_dir:?}/* diff --git a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh index 932837b83db..60df3ed2762 100755 --- a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh +++ b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh @@ -7,27 +7,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by using the file function, we can also get it by this query: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV);" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV);" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo 'OK' || echo 'FAIL' # Create two files in different directories -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/ -echo 'This is file data1' > ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/data1.csv -echo 'This is file data2' > ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/data2.csv +echo 'This is file data1' > ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/data1.csv +echo 'This is file data2' > ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/data2.csv -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';" -rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index 2b9e5296a05..20b02bcba32 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -14,14 +14,11 @@ function run_test_for_disk() echo "$disk" - clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test - clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy -r $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test - # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk" --query "remove -r $CLICKHOUSE_DATABASE/test" } function run_test_copy_from_s3_to_s3(){ @@ -29,13 +26,12 @@ function run_test_copy_from_s3_to_s3(){ local disk_dest=$1 && shift echo "copy from $disk_src to $disk_dest" - clickhouse-disks -C "$config" --disk "$disk_src" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --log-level test --query "copy -r --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove -r $CLICKHOUSE_DATABASE/test.copy" } run_test_for_disk s3_plain_native_copy diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql index 47db46f1e43..bd29e95db9a 100644 --- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -1,4 +1,4 @@ -- Tags: no-fasttest -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. -INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } +INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference new file mode 100644 index 00000000000..daaac9e3030 --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference @@ -0,0 +1,2 @@ +42 +42 diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql new file mode 100644 index 00000000000..274940f50a3 --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); + +select * from t2; + +drop table t1; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +drop table t2; + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +INSERT INTO t2(conversation) VALUES (42); +select * from t2; diff --git a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference new file mode 100644 index 00000000000..dd52d49eea3 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference @@ -0,0 +1 @@ +Hello l x Hexlo Hexxo diff --git a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql new file mode 100644 index 00000000000..917c11fe8dd --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql @@ -0,0 +1,11 @@ +-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement. + +-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings + +SELECT 'Hello' AS haystack, 'l' AS needle, 'x' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement), replaceRegexpAll(materialize(haystack), needle, replacement); + +-- negative tests + +-- Even if the fallback is used, invalid substitutions must throw an exception. +SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS } +SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpAll(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02889_file_log_save_errors.sh b/tests/queries/0_stateless/02889_file_log_save_errors.sh index 8ef7816d57d..cf7ced0bd08 100755 --- a/tests/queries/0_stateless/02889_file_log_save_errors.sh +++ b/tests/queries/0_stateless/02889_file_log_save_errors.sh @@ -4,27 +4,25 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "drop table if exists log_errors;" -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {0..9} do - echo "{\"key\" : $i, \"value\" : $i}" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl - echo "Error $i" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl + echo "{\"key\" : $i, \"value\" : $i}" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl + echo "Error $i" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl done for i in {10..19} do - echo "{\"key\" : $i, \"value\" : $i}" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl - echo "Error $i" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl + echo "{\"key\" : $i, \"value\" : $i}" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl + echo "Error $i" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl done -${CLICKHOUSE_CLIENT} --query "create table file_log(key UInt8, value UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'JSONEachRow') settings handle_error_mode='stream';" +${CLICKHOUSE_CLIENT} --query "create table file_log(key UInt8, value UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'JSONEachRow') settings handle_error_mode='stream';" ${CLICKHOUSE_CLIENT} --query "create Materialized View log_errors engine=MergeTree order by tuple() as select _error as error, _raw_record as record, _filename as file from file_log where not isNull(_error);" function count() @@ -42,4 +40,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from log_errors order by file, record;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" ${CLICKHOUSE_CLIENT} --query "drop table log_errors;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.reference b/tests/queries/0_stateless/02890_named_tuple_functions.reference new file mode 100644 index 00000000000..f7a0c440b5a --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.reference @@ -0,0 +1,9 @@ +Tuple(\n i Int32,\n j Int32) +['i','j'] +Tuple(UInt8, Int32) +['1','2'] +Tuple(\n k UInt8,\n j Int32) +['k','j'] +Tuple(Int32, Int32, Int32, Int32) +['1','2','3','4'] +(1,2,3) diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.sql b/tests/queries/0_stateless/02890_named_tuple_functions.sql new file mode 100644 index 00000000000..8e0c9c2b10e --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.sql @@ -0,0 +1,31 @@ +set enable_named_columns_in_function_tuple = 1; +set allow_experimental_analyzer = 1; + +drop table if exists x; +create table x (i int, j int) engine MergeTree order by i; +insert into x values (1, 2); + +select toTypeName(tuple(i, j)) from x; +select tupleNames(tuple(i, j)) from x; + +select toTypeName(tuple(1, j)) from x; +select tupleNames(tuple(1, j)) from x; + +select toTypeName(tuple(1 as k, j)) from x; +select tupleNames(tuple(1 as k, j)) from x; + +select toTypeName(tuple(i, i, j, j)) from x; +select tupleNames(tuple(i, i, j, j)) from x; + +select tupleNames(1); -- { serverError 43 } + +drop table x; + +drop table if exists tbl; + +-- Make sure named tuple won't break Values insert +create table tbl (x Tuple(a Int32, b Int32, c Int32)) engine MergeTree order by (); +insert into tbl values (tuple(1, 2, 3)); -- without tuple it's interpreted differently inside values block. +select * from tbl; + +drop table tbl diff --git a/tests/queries/0_stateless/02890_untuple_column_names.reference b/tests/queries/0_stateless/02890_untuple_column_names.reference index 388f974c45f..13a85c70138 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.reference +++ b/tests/queries/0_stateless/02890_untuple_column_names.reference @@ -57,6 +57,10 @@ t.1: 1 Row 1: ────── t.1: 1 +-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names +Row 1: +────── +t.a: 1 -- thankfully JSONExtract() keeps them Row 1: ────── diff --git a/tests/queries/0_stateless/02890_untuple_column_names.sql b/tests/queries/0_stateless/02890_untuple_column_names.sql index ab6748cb54d..cd490ca3522 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.sql +++ b/tests/queries/0_stateless/02890_untuple_column_names.sql @@ -37,8 +37,11 @@ SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Verti SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; -- Bug: doesn't throw an exception SELECT '-- tuple() loses the column names (would be good to fix, see #36773)'; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0, enable_named_columns_in_function_tuple = 0; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 0; + +SELECT '-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names'; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 1; SELECT '-- thankfully JSONExtract() keeps them'; SELECT untuple(JSONExtract('{"key": "value"}', 'Tuple(key String)')) x FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; diff --git a/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh b/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh index 42dde18de00..9f93396e368 100755 --- a/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh +++ b/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh @@ -6,11 +6,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp "$CURDIR"/data_csv/1m_rows_cr_end_of_line.csv.xz $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "SELECT count(1) from file('1m_rows_cr_end_of_line.csv.xz') settings input_format_csv_allow_cr_end_of_line=1, optimize_count_from_files=1" $CLICKHOUSE_CLIENT -q "SELECT count(1) from file('1m_rows_cr_end_of_line.csv.xz') settings input_format_csv_allow_cr_end_of_line=1, optimize_count_from_files=0" -rm $USER_FILES_PATH/1m_rows_cr_end_of_line.csv.xz \ No newline at end of file +rm $USER_FILES_PATH/1m_rows_cr_end_of_line.csv.xz diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh index 934c80830c5..a364e447062 100755 --- a/tests/queries/0_stateless/02895_npy_output_format.sh +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY -q "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895; @@ -33,43 +32,43 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " INSERT INTO npy_output_02895.data_types VALUES (1, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.01, 'npy', 'npy'), (-1, -1, -1, -1, 0, 0, 0, 0, 0.2, 0.02, 'npy', 'npynpy'); - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types; - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); SELECT '-- test nested data types --'; CREATE TABLE IF NOT EXISTS npy_output_02895.nested_data_types @@ -81,16 +80,16 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " INSERT INTO npy_output_02895.nested_data_types VALUES ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['a', 'bb']), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['ccc', 'dddd']); - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types; - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); SELECT '-- test exceptions --'; CREATE TABLE IF NOT EXISTS npy_output_02895.exceptions @@ -115,4 +114,4 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 5b6c14ca24f..4eb7e74446d 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -3,7 +3,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O 2 2 2 2 3 3 3 33 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 1 42 4 42 2 2 2 2 3 3 3 33 @@ -12,14 +12,14 @@ SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER 2 2 2 2 3 3 3 33 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; 2 2 2 2 3 3 3 33 \N \N \N \N SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 2 2 2 2 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; 1 42 4 42 2 2 2 2 3 3 3 33 @@ -31,3 +31,21 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; 1 1 1 +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +\N \N \N \N +-- aliases defined in the join condition are valid +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + +-- check for non-nullable columns for which `is null` is replaced with constant +SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +-- +0 +0 +2 +2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 5458370db8c..f7813e2a1b4 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t1n; +DROP TABLE IF EXISTS t2n; CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; @@ -7,24 +9,63 @@ CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL); INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42); +CREATE TABLE t1n (x Int64, y UInt64) ENGINE = TinyLog; +CREATE TABLE t2n (x Int64, y UInt64) ENGINE = TinyLog; + +INSERT INTO t1n VALUES (1,42), (2,2), (3,3); +INSERT INTO t2n VALUES (2,2), (3,33), (4,42); + SET allow_experimental_analyzer = 1; -- { echoOn } SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST; -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; -SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2 ORDER BY t1.x NULLS LAST; SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; + +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; + +-- aliases defined in the join condition are valid +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + +-- check for non-nullable columns for which `is null` is replaced with constant +SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; + -- { echoOff } +SELECT '--'; + +-- IS NOT NULL and constants are optimized out +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND (t1.x IS NOT NULL) AND true AND (t2.x IS NOT NULL) ) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND true ) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +-- this is not optimized out +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t1n; +DROP TABLE IF EXISTS t2n; diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 07deb7c2565..335b55f05c8 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,14 +10,13 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 -2023-09-01 12:00:00 42 +2022-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 +2022-09-01 12:00:00 42 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index a2378fd8f67..f206f0d7775 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,9 +54,8 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded - Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -98,7 +97,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables + - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/02931_file_cluster.sh b/tests/queries/0_stateless/02931_file_cluster.sh index 8566e2ab08e..ebd3792e1dc 100755 --- a/tests/queries/0_stateless/02931_file_cluster.sh +++ b/tests/queries/0_stateless/02931_file_cluster.sh @@ -4,8 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - mkdir -p "${USER_FILES_PATH}"/"${CLICKHOUSE_TEST_UNIQUE_NAME}"/ for i in {1..10} diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index 76ada756f47..2e5a538007c 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) disk_name="s3_cache_02933" $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" -config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf.xml config_path_tmp=$config_path.tmp cat $config_path \ diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference index 8a6e77d4f6d..53e5a556821 100644 --- a/tests/queries/0_stateless/02941_variant_type_1.reference +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -91,42 +91,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -145,21 +145,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -256,42 +256,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -310,23 +310,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -421,42 +421,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -475,23 +475,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree compact @@ -587,42 +587,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -641,21 +641,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -751,42 +751,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -805,21 +805,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -916,42 +916,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -970,23 +970,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -1080,42 +1080,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1134,23 +1134,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -1245,42 +1245,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1299,23 +1299,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -1409,42 +1409,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1463,23 +1463,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree wide @@ -1575,42 +1575,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1629,21 +1629,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1739,42 +1739,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1793,21 +1793,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1904,42 +1904,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1958,23 +1958,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -2068,42 +2068,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -2122,23 +2122,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -2233,42 +2233,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2287,23 +2287,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -2397,42 +2397,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2451,22 +2451,22 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 20a5176cb5e..1d9126aa230 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -6,9 +6,6 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test4 insert test4 select @@ -17,18 +14,12 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test4 insert test4 select @@ -37,15 +28,9 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index f43cd2bb0d6..8453bce98dc 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -33,13 +33,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference index 1ccdb3acdff..d28aa7a594b 100644 --- a/tests/queries/0_stateless/02941_variant_type_3.reference +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -6,9 +6,6 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test5 insert test5 select @@ -17,18 +14,12 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test5 insert test5 select @@ -37,15 +28,9 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index f4b2b304f56..990eb25b5be 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -35,13 +35,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference index e13d5820343..d1630b04347 100644 --- a/tests/queries/0_stateless/02941_variant_type_4.reference +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -6,9 +6,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test6 insert @@ -18,9 +15,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -28,9 +22,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test6 insert @@ -40,9 +31,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -50,7 +38,4 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index f9a16847864..b8f619694b0 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -29,13 +29,10 @@ function test6_select() select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); - select v.\`Array(UInt64)\`.size0 from test format Null; - select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + select v.\`Array(UInt64)\`.size0 from test format Null;" echo "-----------------------------------------------------------------------------------------------------------" } diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 6f454da40da..cb099bb59ae 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" $CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" -config_path=/etc/clickhouse-server/config.d/storage_conf_02944.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf_02944.xml config_path_tmp=$config_path.tmp echo 'set max_size from 100 to 10' diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh index a02bdd0a1d2..3d2fe5d664d 100755 --- a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 02950_database_for_ssd_cache_dictionary; CREATE DATABASE 02950_database_for_ssd_cache_dictionary; @@ -32,7 +30,7 @@ $CLICKHOUSE_CLIENT -n --query=" PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'source_table')) LIFETIME(MIN 1 MAX 1000) - LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$USER_FILES_PATH/0d')); + LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$CLICKHOUSE_USER_FILES/0d')); SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) FROM 02950_database_for_ssd_cache_dictionary.source_table; SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', 'v2', id+1, intDiv(NULL, id)) FROM 02950_database_for_ssd_cache_dictionary.source_table; diff --git a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh index 4e085541a8d..145b921a750 100755 --- a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh +++ b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh @@ -15,7 +15,7 @@ WHERE policy_name = 'policy_02961' ORDER BY volume_priority ASC; " -config_path=/etc/clickhouse-server/config.d/storage_conf_02961.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf_02961.xml config_path_tmp=$config_path.tmp echo 'check non-unique values dont work' diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh index b61be87411d..9ef271632d0 100755 --- a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh @@ -62,7 +62,7 @@ export -f sync_and_drop_replicas export -f optimize_thread export -f mutations_thread -TIMEOUT=60 +TIMEOUT=30 timeout $TIMEOUT bash -c insert_thread 2> /dev/null & timeout $TIMEOUT bash -c sync_and_drop_replicas 2> /dev/null & diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh index 199893a9428..d9bae05270a 100755 --- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -4,12 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -logs_dir=${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME} +logs_dir=${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME} rm -rf ${logs_dir} diff --git a/tests/queries/0_stateless/02971_analyzer_remote_id.sh b/tests/queries/0_stateless/02971_analyzer_remote_id.sh index 463e4cc1f0c..ab3c5292529 100755 --- a/tests/queries/0_stateless/02971_analyzer_remote_id.sh +++ b/tests/queries/0_stateless/02971_analyzer_remote_id.sh @@ -1,15 +1,9 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" -${CLICKHOUSE_CLIENT} --query="CREATE DATABASE test_02971" - -${CLICKHOUSE_CLIENT} --query="CREATE TABLE test_02971.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" -${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', 'test_02971.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ +${CLICKHOUSE_CLIENT} --query="CREATE TABLE ${CLICKHOUSE_DATABASE}.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" +${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', '${CLICKHOUSE_DATABASE}.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ | grep -av "ASan doesn't fully support makecontext/swapcontext functions" - -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index 14f28f1ba4a..6edac86be5b 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -4,8 +4,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Data preparation step -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference index 531163e1d84..3135f2d01e1 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index 12d08159012..d543f7195a9 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -49,11 +49,11 @@ path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM path=${path%/} echo "Files before DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 $CLICKHOUSE_CLIENT -q "detach table data" echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference index 1e191b719a5..a2dd196083e 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index b079e67a000..eec05c81344 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,14 +55,14 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " detach table data_read; detach table data_write; " echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02984_form_format.sh b/tests/queries/0_stateless/02984_form_format.sh index ce5feb60130..471b48e0f68 100755 --- a/tests/queries/0_stateless/02984_form_format.sh +++ b/tests/queries/0_stateless/02984_form_format.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Test setup -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME="data.tmp" FORM_DATA="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/${FILE_NAME}" mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ @@ -30,4 +28,4 @@ touch $FORM_DATA echo -ne "c.e=ls7xfkpm&c.tti.m=raf&rt.start=navigation&rt.bmr=390%2C11%2C10&rt.tstart=1707076768666&rt.bstart=1707076769091&rt.blstart=1707076769056&rt.end=1707076769078&t_resp=296&t_page=116&t_done=412&t_other=boomerang%7C6%2Cboomr_fb%7C425%2Cboomr_ld%7C390%2Cboomr_lat%7C35&rt.tt=2685&rt.obo=0&pt.fcp=407&nt_nav_st=1707076768666&nt_dns_st=1707076768683&nt_dns_end=1707076768684&nt_con_st=1707076768684&nt_con_end=1707076768850&nt_req_st=1707076768850&nt_res_st=1707076768962&nt_res_end=1707076768962&nt_domloading=1707076769040&nt_domint=1707076769066&nt_domcontloaded_st=1707076769067&nt_domcontloaded_end=1707076769068&nt_domcomp=1707076769069&nt_load_st=1707076769069&nt_load_end=1707076769078&nt_unload_st=1707076769040&nt_unload_end=1707076769041&nt_ssl_st=1707076768788&nt_enc_size=3209&nt_dec_size=10093&nt_trn_size=3940&nt_protocol=h2&nt_red_cnt=0&nt_nav_type=1&restiming=%7B%22https%3A%2F%2Fwww.basicrum.com%2F%22%3A%7B%22publications%2F%22%3A%226%2C88%2C88%2C54%2C54%2C3e%2Ci%2Ci%2Ch*12h5%2Ckb%2C5b8%22%2C%22assets%2Fjs%2F%22%3A%7B%22just-the-docs.js%22%3A%223am%2Ce%2Ce*12pc%2C_%2C8oj*20%22%2C%22boomerang-1.737.60.cutting-edge.min.js%22%3A%222au%2Cb%2Ca*1pu3%2C_%2C1m19*21*42%22%2C%22vendor%2Flunr.min.js%22%3A%223am%2Cd%2C8*16t2%2C_%2Cfym*20%22%7D%7D%7D&u=https%3A%2F%2Fwww.basicrum.com%2Fpublications%2F&r=https%3A%2F%2Fwww.basicrum.com%2Fcost-analyses%2F&v=1.737.60&sv=14&sm=p&rt.si=dd0c542f-7adf-4310-830a-6c0a3d157c90-s8cjr1&rt.ss=1707075325294&rt.sl=4&vis.st=visible&ua.plt=Linux%20x86_64&ua.vnd=&pid=8fftz949&n=1&c.t.fps=07*4*65*j*61&c.t.busy=2*4*0034&c.tti.vr=408&c.tti=408&c.b=2&c.f=60&c.f.d=2511&c.f.m=1&c.f.s=ls7xfl1h&dom.res=5&dom.doms=1&mem.lsln=0&mem.ssln=0&mem.lssz=2&mem.sssz=2&scr.xy=1920x1200&scr.bpp=24%2F24&scr.orn=0%2Flandscape-primary&cpu.cnc=16&dom.ln=114&dom.sz=10438&dom.ck=157&dom.img=0&dom.script=6&dom.script.ext=3&dom.iframe=0&dom.link=4&dom.link.css=1&sb=1" > $FORM_DATA $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FORM_DATA', Form) FORMAT Vertical" -rm $FORM_DATA \ No newline at end of file +rm $FORM_DATA diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python deleted file mode 100644 index dd1058518c9..00000000000 --- a/tests/queries/0_stateless/03008_deduplication.python +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import argparse -import string - - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - - -def __format(template, **params): - field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] - kv_args = {} - for field in field_names: - if field in params: - kv_args[field] = params[field] - else: - kv_args[field] = "" - - return template.format(**kv_args) - - -def instance_create_statement( - table_name, - table_columns, - table_keys, - table_engine, - with_deduplication, - no_merges=True, -): - template = """ - CREATE TABLE {table_name} - {table_columns} - ENGINE = {table_engine} - ORDER BY {table_keys} - {table_settings}; - {table_no_merges} - """ - - params = dict() - params["table_name"] = table_name - params["table_columns"] = table_columns - params["table_keys"] = table_keys - params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = ( - "MergeTree()" - if table_engine == "MergeTree" - else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" - ) - - deduplication_window_setting_name = ( - "non_replicated_deduplication_window" - if table_engine == "MergeTree" - else "replicated_deduplication_window" - ) - deduplication_window_setting_value = 1000 if with_deduplication else 0 - - settings = list() - settings += [ - f"{deduplication_window_setting_name}={deduplication_window_setting_value}" - ] - params["table_settings"] = "SETTINGS " + ",".join(settings) - - return __format(template, **params) - - -def instance_insert_statement( - table_name, count, insert_method, insert_unique_blocks, use_insert_token -): - insert_settings = ( - "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" - ) - - if insert_method == "InsertSelect": - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" - if not insert_unique_blocks - else "'src_' || toString(number), number", - insert_settings=insert_settings, - ) - - else: - template = """ - INSERT INTO {table_name} - {insert_settings} VALUES {insert_values}; - """ - - values = [] - for i in range(count): - values += ( - [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] - ) - insert_values = ", ".join(values) - - return __format( - template, - table_name=table_name, - insert_settings=insert_settings, - insert_values=insert_values, - ) - - -def get_drop_tables_statements(tables): - return "".join( - [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] - ) - - -def get_logs_statement(args): - if args.get_logs: - return "SET send_logs_level='test';" - return "" - - -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - -class ArgsFactory: - def __init__(self, parser): - self.__parser = parser - - def add_opt_engine(self): - self.__parser.add_argument( - "--table-engine", - choices=["ReplicatedMergeTree", "MergeTree"], - default="MergeTree", - ) - - def add_opt_user_token(self): - self.__parser.add_argument( - "--use-insert-token", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_single_thread(self): - self.__parser.add_argument( - "--single-thread", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_dedup_src(self): - self.__parser.add_argument( - "--deduplicate-src-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_dedup_dst(self): - self.__parser.add_argument( - "--deduplicate-dst-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_get_logs(self): - self.__parser.add_argument( - "--get-logs", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_uniq_blocks(self): - self.__parser.add_argument( - "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_insert_method(self): - self.__parser.add_argument( - "--insert-method", - choices=["InsertSelect", "InsertValues"], - default="InsertSelect", - ) - - def add_all(self): - self.add_opt_engine() - self.add_opt_user_token() - self.add_opt_single_thread() - self.add_opt_dedup_src() - self.add_opt_dedup_dst() - self.add_opt_get_logs() - self.add_opt_insert_method() - self.add_opt_uniq_blocks() - - -def test_insert_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_statement = instance_create_statement( - table_name="table_when_b_even", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - create_mv_statement = """ - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - """ - - drop_tables_statements = get_drop_tables_statements( - ["table_a_b", "table_when_b_even", "mv_b_even"] - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 10, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - print_details_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - - SELECT 'table_when_b_even'; - SELECT 'count', count() FROM table_when_b_even; - {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 5 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = """ - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 10 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_a_b_statement} - - {create_table_when_b_even_statement} - - {create_mv_statement} - - -- first insert - {insert_statement} - - {print_details_statements} - - {assert_first_insert_statements} - - -- second insert, it is retry - {insert_statement} - - {print_details_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_mv_generates_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = [ - "table_for_join_with", - "table_a_b", - "table_when_b_even_and_joined", - "mv_b_even", - ] - drop_tables_statements = get_drop_tables_statements(tables) - - details_print_for_table_for_join_with = "" - if args.get_logs: - details_print_for_table_for_join_with = """ - SELECT 'table_for_join_with'; - SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - """ - - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a_src String, b UInt64)", - table_keys="(a_src, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_and_joined_statement = instance_create_statement( - table_name="table_when_b_even_and_joined", - table_columns="(a_src String, a_join String, b UInt64)", - table_keys="(a_src, a_join, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 5, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - - SELECT 'table_when_b_even_and_joined'; - SELECT 'count', count() FROM table_when_b_even_and_joined; - {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 5 ) - FROM table_a_b; - - SELECT throwIf( count() != 9 ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) - FROM table_when_b_even_and_joined; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - CREATE TABLE table_for_join_with - (a_join String, b UInt64) - ENGINE = MergeTree() - ORDER BY (a_join, b); - INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(1); - {details_print_for_table_for_join_with} - - {create_table_a_b_statement} - SYSTEM STOP MERGES table_a_b; - - {create_table_when_b_even_and_joined_statement} - SYSTEM STOP MERGES table_when_b_even_and_joined; - - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even_and_joined - AS - SELECT a_src, a_join, table_for_join_with.b as b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -- first insert - {insert_statement} - - {details_print_statements} - - -- first assertion - {assert_first_insert_statements} - - -- second insert - {insert_statement} - - {details_print_statements} - - -- second assertion - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_several_mv_into_one_table(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] - drop_tables_statements = get_drop_tables_statements(tables) - - create_table_src_statement = instance_create_statement( - table_name="table_src", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_dst_statement = instance_create_statement( - table_name="table_dst", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_src", - 8, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_src count', count() FROM table_src; - - SELECT 'table_dst count', count() FROM table_dst; - {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 8 ) - FROM table_src; - - SELECT throwIf( count() != 6 ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) - FROM table_dst; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_src_statement} - - {create_table_dst_statement} - - CREATE MATERIALIZED VIEW mv_b_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 2 = 0; - - CREATE MATERIALIZED VIEW mv_b_even_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 4 = 0; - - -- first insert - {insert_statement} - - {details_print_statements} - - {assert_first_insert_statements} - - -- second insert, retry - {insert_statement} - - {details_print_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def parse_args(): - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest="test") - test_insert_several_blocks( - subparsers.add_parser("insert_several_blocks_into_table") - ) - test_mv_generates_several_blocks( - subparsers.add_parser("mv_generates_several_blocks") - ) - test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) - args = parser.parse_args() - if args.test is None: - parser.print_help() - return args - - -def main(): - args = parse_args() - if args.test is not None: - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference deleted file mode 100644 index 4893274c1cd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference +++ /dev/null @@ -1,41 +0,0 @@ -Different materialized view insert into one underlayed table equal data. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -second attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Different insert operations generate the same data after transformation in underlied table of materialized view. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -second attempt -from dst 1 A all_1_1_0 -from dst 2 A all_2_2_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Indentical blocks in insertion with `insert_deduplication_token` -first attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -second attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -third attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -Indentical blocks in insertion -from dst 0 A all_1_1_0 -Indentical blocks after materialised view`s transformation -first attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 -second attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql deleted file mode 100644 index 7927a6b1edf..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql +++ /dev/null @@ -1,331 +0,0 @@ --- ######### -select 'Different materialized view insert into one underlayed table equal data.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; -DROP TABLE IF EXISTS mv_first; -DROP TABLE IF EXISTS mv_second; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE TABLE mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_first -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -CREATE MATERIALIZED VIEW mv_second -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_second; -DROP TABLE mv_first; -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (2, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion with `insert_deduplication_token`'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -select 'first attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'third attempt'; - -INSERT INTO dst SELECT - 1 AS key, - 'b' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks after materialised view`s transformation'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference deleted file mode 100644 index c82a6eaa213..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ /dev/null @@ -1,35 +0,0 @@ -no user deduplication token -partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: -1 A -1 D -2 B -2 C -mv_table is not deduplicated because the inserted blocks was different: -1 A -1 A -1 D -2 B -2 B -2 C -with user deduplication token -partitioned_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -mv_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -with incorrect ussage of user deduplication token -partitioned_table is deduplicated because equal tokens: -1 A -2 B -mv_table is deduplicated because equal tokens: -1 A -2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql deleted file mode 100644 index 2eb931f7f73..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ /dev/null @@ -1,83 +0,0 @@ -DROP TABLE IF EXISTS partitioned_table; -DROP TABLE IF EXISTS mv_table; - - -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is not deduplicated because different tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because different tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with incorrect ussage of user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated because equal tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is deduplicated because equal tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference deleted file mode 100644 index bf900aa84d2..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh deleted file mode 100755 index 49eb52b47fd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference deleted file mode 100644 index c815324b455..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh deleted file mode 100755 index 53af06d4a6f..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference deleted file mode 100644 index 6e76ec46aa8..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh deleted file mode 100755 index 7d4f5240cd1..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference deleted file mode 100644 index a25e8713c61..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh deleted file mode 100755 index 109d1674f3a..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference deleted file mode 100644 index b6a3e0175a7..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh deleted file mode 100755 index fe3d610a758..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference deleted file mode 100644 index 1921103f49e..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh deleted file mode 100755 index 9adee6d53d4..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 09bdd7f6b56..cba5317fcfa 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" prev_max_size=$($CLICKHOUSE_CLIENT --query "SELECT max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") $CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" -config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf.xml new_max_size=$($CLICKHOUSE_CLIENT --query "SELECT divide(max_size, 2) FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") sed -i "s|$prev_max_size<\/max_size>|$new_max_size<\/max_size>|" $config_path diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index cedb651a430..1e6bfb414d8 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o StorageFileSink | wc -l +" | grep -o MaterializingTransform | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh index b82ddb3813e..5d8eac082cf 100755 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" function test() diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..a9c785d1e48 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -2,525 +2,525 @@ Memory initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 4 UInt64 7 String 8 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree compact initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree wide initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference index 18a181464e9..f7c00bd8c44 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -2,181 +2,181 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference index b1ea186a917..0dab4ea0d20 100644 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -2,55 +2,55 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 diff --git a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh index ea7bb8f7ad0..c27dfffcfc2 100755 --- a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh +++ b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh @@ -85,7 +85,7 @@ export -f optimize_thread; export -f insert_thread; -TIMEOUT=30 +TIMEOUT=20 # Sometimes we detach and attach tables timeout $TIMEOUT bash -c alter_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/03153_format_regexp_usability.sh b/tests/queries/0_stateless/03153_format_regexp_usability.sh index 03bed10dd17..561de3be893 100755 --- a/tests/queries/0_stateless/03153_format_regexp_usability.sh +++ b/tests/queries/0_stateless/03153_format_regexp_usability.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-ordinary-database, long +# Tags: no-fasttest, no-ordinary-database, long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.reference b/tests/queries/0_stateless/03167_base64_url_functions_sh.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh new file mode 100755 index 00000000000..57060b8c525 --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# shellcheck disable=SC2155 + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +urls=( + "http://www.example.com" + "https://secure.example.com" + "http://example.com" + "https://www.example.org" + "https://subdomain.example.com" + "http://sub.sub.example.com" + "http://192.168.1.1" + "https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]" + "http://example.com:8080" + "https://example.com:443" + "http://example.com/path/to/page.html" + "https://example.com/path/with/trailing/slash/" + "http://example.com/search?q=query&lang=en" + "https://example.com/path?param1=value1¶m2=value2" + "http://example.com/page.html#section1" + "https://example.com/document.pdf#page=10" + "http://user:password@example.com" + "https://user@example.com" + "https://user:pass@sub.example.com:8080/path/page.html?query=123#fragment" + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=encode+this" + "http://例子.测试" + "https://mañana.com" + "http://example.com/%E2%82%AC" + "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" + "file:///C:/path/to/file.txt" + "file:///home/user/document.pdf" + "ftp://ftp.example.com/pub/file.zip" + "ftps://secure-ftp.example.com/private/doc.pdf" + "mailto:user@example.com" + "mailto:user@example.com?subject=Hello&body=How%20are%20you" + "git://github.com/user/repo.git" + "ssh://user@host.xz:port/path/to/repo.git" + "https://example.com/path(1)/[2]/{3}" + "http://example.com/path;param?query,value" + "" + "http://" + "example.com" + "http:" + "//" + "?query=value" + "#fragment" + "http://?#" + "http://xn--bcher-kva.ch" + "https://xn--bcher-kva.xn--tckwe/xn--8ws00zhy3a/%E6%B8%AC%E8%A9%A6.php?xn--o39an51a5phao35a=xn--mgbh0fb&xn--fiq228c5hs=test" + "https://xn--3e0b707e.xn--79-8kcre8v3a/%ED%85%8C%EC%8A%A4%ED%8A%B8/%ED%8C%8C%EC%9D%BC.jsp?xn--i1b6b1a6a2e=xn--9t4b11yi5a&xn--3e0b707e=xn--80aaa1cbgbm" + "https://example.com/path?param=value&special=!@#$%^&*()" + + "http://example.com/path/with/~tilde" + "https://example.com/path/with/\`backtick\`" + + "https://example.com/path?param1=value1¶m2=value2¶m3=value3#section1#section2" + "http://example.com/page?q1=v1&q2=v2#frag1#frag2#frag3" + + "https://example.com/☃/snowman" + "http://example.com/path/⽇本語" + "https://example.com/ü/ñ/path?q=ç" + + "https://example.com/path/to/very/long/url/that/exceeds/two/hundred/and/fifty/five/characters/lorem/ipsum/dolor/sit/amet/consectetur/adipiscing/elit/sed/do/eiusmod/tempor/incididunt/ut/labore/et/dolore/magna/aliqua/ut/enim/ad/minim/veniam/quis/nostrud/exercitation/ullamco/laboris/nisi/ut/aliquip/ex/ea/commodo/consequat" + + "https://example.com//path///to//file" + "http://example.com/path?param1=value1&¶m2=value2&&¶m3=value3" + + "http://example.com/%70%61%74%68?%70%61%72%61%6d=%76%61%6c%75%65#%66%72%61%67%6d%65%6e%74" + + "HtTpS://ExAmPlE.cOm/PaTh" + "http://EXAMPLE.COM/PATH" + + "http://127.0.0.1:8080/path" + "https://[::1]/path" + "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080/path" + + "http://example.com:65535/path" + "https://example.com:0/path" + + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQottAAAAABJRU5ErkJggg==" + + "https://user:password@example.com:8080/path?query=value#fragment" + "ftp://anonymous:password@ftp.example.com/pub/" + + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=query%20with%20spaces" + + "https://www.mañana.com/path" + "http://例子.测试/path" + "https://рм.рф/path" + + "https://user:pass@sub.example.com:8080/p/a/t/h?query=123&key=value#fragid1" + + "jdbc:mysql://localhost:3306/database" + "market://details?id=com.example.app" + "tel:+1-816-555-1212" + "sms:+18165551212" + + "http://[1080:0:0:0:8:800:200C:417A]/index.html" + "https://[2001:db8::1428:57ab]:8080/path" + + "http://.." + "http://../" + "http://??" + "http://??/" + "http:///a" + "http://example.com??" + "http://example.com??/" + "foo://example.com:8042/over/there?name=ferret#nose" + "//example.com/path" +) + + +base64URLEncode() { + echo -n "$1" | base64 -w0 | tr '+/' '-_' | tr -d '=' +} + +base64URLDecode() { + local len=$((${#1} % 4)) + local result="$1" + if [ $len -eq 2 ]; then result="$1"'==' + elif [ $len -eq 3 ]; then result="$1"'=' + fi + echo "$result" | tr '_-' '/+' | base64 -w0 -d +} + +test() { + local input="$1" + local encode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local encode_gold=$(base64URLEncode $input) + + local decode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") + local decode_gold=$(base64URLDecode $encode_gold) + + if [ "$encode_ch" != "$encode_gold" ]; then + echo "Input: $input" + echo "Expected: $encode_gold" + echo "Got: $encode_ch" + fi + + if [ "$decode_ch" != "$input" ] || [ "$decode_ch" != "$decode_gold" ]; then + echo "Input: $input" + echo "Decode gold: $decode_gold" + echo "Got: $decode_ch" + fi +} + + +for url in "${urls[@]}"; do + test "$url" +done + +# special case for ' +decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('http://example.com/!$&\'()*+,;=:@/path'))") +if [ "$decode" != "http://example.com/!$&\'()*+,;=:@/path" ]; then + echo "Special case fail" + echo "Got: $decode" +fi diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference new file mode 100644 index 00000000000..306885a0974 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference @@ -0,0 +1,6 @@ +1 +0 +1 +0 +0 +0 diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql new file mode 100644 index 00000000000..75025dcadc8 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t_read_in_order_1; + +CREATE TABLE t_read_in_order_1 (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 1024, index_granularity_bytes = '10M'; + +INSERT INTO t_read_in_order_1 SELECT number, number FROM numbers(1000000); + +SET max_threads = 8; +SET optimize_read_in_order = 1; +SET read_in_order_use_buffering = 1; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SET read_in_order_use_buffering = 0; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +DROP TABLE t_read_in_order_1; diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql new file mode 100644 index 00000000000..1d3a75412e0 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -0,0 +1,17 @@ +-- Tags: long, no-random-settings, no-tsan, no-asan, no-msan, no-s3-storage + +DROP TABLE IF EXISTS t_read_in_order_2; + +CREATE TABLE t_read_in_order_2 (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(10000000); +OPTIMIZE TABLE t_read_in_order_2 FINAL; + +SET optimize_read_in_order = 1; +SET max_threads = 4; +SET read_in_order_use_buffering = 1; +SET max_memory_usage = '100M'; + +SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null; + +DROP TABLE t_read_in_order_2; diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference new file mode 100644 index 00000000000..f5668ed935b --- /dev/null +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference @@ -0,0 +1,50 @@ +\N None +42 UInt8 +-42 Int8 +42 UInt16 +-42 Int16 +42 UInt32 +-42 Int32 +42 UInt64 +-42 Int64 +42 UInt128 +-42 Int128 +42 UInt256 +-42 Int256 +42.42 Float32 +42.42 Float64 +2020-01-01 Date +2020-01-01 Date32 +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +Hello, World! String +aaaaa FixedString(5) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +42.42 Decimal(9, 3) +42.42 Decimal(18, 3) +42.42 Decimal(38, 3) +42.42 Decimal(76, 3) +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +[1,2,3] Array(UInt64) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(a UInt32, b String, c Float32) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8))) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +42 SimpleAggregateFunction(sum, UInt64) +Hello, World! LowCardinality(String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +127.0.0.0 IPv4 +2001:db8:cafe:1::1 IPv6 +true Bool +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(0,0),(10,0),(10,10),(0,10)] Ring +(0,0) Point +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh new file mode 100755 index 00000000000..9b57e5c8718 --- /dev/null +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=255)) engine=Memory" + +$CLICKHOUSE_CLIENT -q "insert into test select 0, NULL" +$CLICKHOUSE_CLIENT -q "insert into test select 1, materialize(42)::UInt8" +$CLICKHOUSE_CLIENT -q "insert into test select 2, materialize(-42)::Int8" +$CLICKHOUSE_CLIENT -q "insert into test select 3, materialize(42)::UInt16" +$CLICKHOUSE_CLIENT -q "insert into test select 4, materialize(-42)::Int16" +$CLICKHOUSE_CLIENT -q "insert into test select 5, materialize(42)::UInt32" +$CLICKHOUSE_CLIENT -q "insert into test select 6, materialize(-42)::Int32" +$CLICKHOUSE_CLIENT -q "insert into test select 7, materialize(42)::UInt64" +$CLICKHOUSE_CLIENT -q "insert into test select 8, materialize(-42)::Int64" +$CLICKHOUSE_CLIENT -q "insert into test select 9, materialize(42)::UInt128" +$CLICKHOUSE_CLIENT -q "insert into test select 10, materialize(-42)::Int128" +$CLICKHOUSE_CLIENT -q "insert into test select 11, materialize(42)::UInt256" +$CLICKHOUSE_CLIENT -q "insert into test select 12, materialize(-42)::Int256" +$CLICKHOUSE_CLIENT -q "insert into test select 13, materialize(42.42)::Float32" +$CLICKHOUSE_CLIENT -q "insert into test select 14, materialize(42.42)::Float64" +$CLICKHOUSE_CLIENT -q "insert into test select 15, materialize('2020-01-01')::Date" +$CLICKHOUSE_CLIENT -q "insert into test select 16, materialize('2020-01-01')::Date32" +$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime('EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00')::DateTime('CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 19, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 20, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 21, materialize('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 22, materialize('aaaaa')::FixedString(5)" +$CLICKHOUSE_CLIENT -q "insert into test select 23, materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +$CLICKHOUSE_CLIENT -q "insert into test select 24, materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +$CLICKHOUSE_CLIENT -q "insert into test select 25, materialize(42.42)::Decimal32(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 26, materialize(42.42)::Decimal64(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 27, materialize(42.42)::Decimal128(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 28, materialize(42.42)::Decimal256(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 29, materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +$CLICKHOUSE_CLIENT -q "insert into test select 30, materialize([1, 2, 3])::Array(UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 31, materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +$CLICKHOUSE_CLIENT -q "insert into test select 32, materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 33, materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 34, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 35, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 36, quantileState(0.5)(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 37, sumSimpleState(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 38, toLowCardinality('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 39, materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +$CLICKHOUSE_CLIENT -q "insert into test select 40, materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +$CLICKHOUSE_CLIENT -q "insert into test select 41, materialize('127.0.0.0')::IPv4" +$CLICKHOUSE_CLIENT -q "insert into test select 42, materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +$CLICKHOUSE_CLIENT -q "insert into test select 43, materialize(true)::Bool" +$CLICKHOUSE_CLIENT -q "insert into test select 44, materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +$CLICKHOUSE_CLIENT -q "insert into test select 45, materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +$CLICKHOUSE_CLIENT -q "insert into test select 46, materialize((0, 0))::Point" +$CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +$CLICKHOUSE_CLIENT -q "insert into test select 48, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +$CLICKHOUSE_CLIENT -q "insert into test select 49, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" + +$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=255)' -q "select d, dynamicType(d) from table order by id" +$CLICKHOUSE_CLIENT -q "drop table test" + diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference new file mode 100644 index 00000000000..93018551e1b --- /dev/null +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference @@ -0,0 +1,10 @@ +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh new file mode 100755 index 00000000000..20a29e2734e --- /dev/null +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nq " + CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); + + CREATE TABLE data (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO data VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-01', 'CREATED'); + + CREATE TABLE data2 (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); +" + +$CLICKHOUSE_CLIENT -nq " +SET allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; + +SELECT + id, + whatever +FROM ids AS l +INNER JOIN view( + SELECT * + FROM merge($CLICKHOUSE_DATABASE, 'data.*') +) AS s ON l.id = s.id +WHERE status IN ['CREATED', 'CREATING'] +ORDER BY event_time DESC; + +SELECT '---------------------------'; + +with +results1 as ( + SELECT id + FROM data t1 + inner join ids t2 + on t1.id = t2.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 union all select * from results2; + +SELECT '---------------------------'; + +with +results1 as ( + SELECT id + FROM data t1 + inner join ids t2 + on t1.id = t2.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 t1 inner join results2 t2 using (id); + +SELECT '---------------------------'; + +with +results1 as ( + SELECT t1.id + FROM data t1 + inner join ids t2 on t1.id = t2.id + left join data t3 on t2.id = t3.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 union all select * from results2; +" diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference new file mode 100644 index 00000000000..1ba147f9627 --- /dev/null +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference @@ -0,0 +1,114 @@ +42 UInt8 +42 UInt8 +\N Nullable(Nothing) +\N Nullable(Nothing) +42 UInt8 +42 UInt8 +-42 Int8 +-42 Int8 +42 UInt16 +42 UInt16 +-42 Int16 +-42 Int16 +42 UInt32 +42 UInt32 +-42 Int32 +-42 Int32 +42 UInt64 +42 UInt64 +-42 Int64 +-42 Int64 +42 UInt128 +42 UInt128 +-42 Int128 +-42 Int128 +42 UInt256 +42 UInt256 +-42 Int256 +-42 Int256 +42.42 Float32 +42.42 Float32 +42.42 Float64 +42.42 Float64 +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date32 +2020-01-01 Date32 +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +Hello, World! String +Hello, World! String +aaaaa FixedString(5) +aaaaa FixedString(5) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +42.42 Decimal(9, 3) +42.42 Decimal(9, 3) +42.42 Decimal(18, 3) +42.42 Decimal(18, 3) +42.42 Decimal(38, 3) +42.42 Decimal(38, 3) +42.42 Decimal(76, 3) +42.42 Decimal(76, 3) +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +[1,2,3] Array(UInt64) +[1,2,3] Array(UInt64) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(\n a UInt32,\n b String,\n c Float32) +(1,'str',42.42) Tuple(\n a UInt32,\n b String,\n c Float32) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(\n a UInt32,\n b Tuple(\n c String,\n d Tuple(\n e Float32,\n f Int8))) +(1,('str',(42.42,-30))) Tuple(\n a UInt32,\n b Tuple(\n c String,\n d Tuple(\n e Float32,\n f Int8))) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +42 SimpleAggregateFunction(sum, UInt64) +42 SimpleAggregateFunction(sum, UInt64) +Hello, World! LowCardinality(String) +Hello, World! LowCardinality(String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +127.0.0.0 IPv4 +127.0.0.0 IPv4 +2001:db8:cafe:1::1 IPv6 +2001:db8:cafe:1::1 IPv6 +true Bool +true Bool +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(0,0),(10,0),(10,10),(0,10)] Ring +[(0,0),(10,0),(10,10),(0,10)] Ring +(0,0) Point +(0,0) Point +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) +42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) +42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) +[{42:(1,[(2,{1:2})])}] Dynamic +[{42:(1,[(2,{1:2})])}] Dynamic +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh new file mode 100755 index 00000000000..723b11ad620 --- /dev/null +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function test +{ + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" +} + +test "materialize(42)::UInt8" +test "NULL" +test "materialize(42)::UInt8" +test "materialize(-42)::Int8" +test "materialize(42)::UInt16" +test "materialize(-42)::Int16" +test "materialize(42)::UInt32" +test "materialize(-42)::Int32" +test "materialize(42)::UInt64" +test "materialize(-42)::Int64" +test "materialize(42)::UInt128" +test "materialize(-42)::Int128" +test "materialize(42)::UInt256" +test "materialize(-42)::Int256" +test "materialize(42.42)::Float32" +test "materialize(42.42)::Float64" +test "materialize('2020-01-01')::Date" +test "materialize('2020-01-01')::Date32" +test "materialize('2020-01-01 00:00:00')::DateTime" +test "materialize('2020-01-01 00:00:00')::DateTime('EST')" +test "materialize('2020-01-01 00:00:00')::DateTime('CET')" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6)" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" +test "materialize('Hello, World!')" +test "materialize('aaaaa')::FixedString(5)" +test "materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +test "materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +test "materialize(42.42)::Decimal32(3)" +test "materialize(42.42)::Decimal64(3)" +test "materialize(42.42)::Decimal128(3)" +test "materialize(42.42)::Decimal256(3)" +test "materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +test "materialize([1, 2, 3])::Array(UInt64)" +test "materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +test "materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +test "materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +test "materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +test "materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +test "quantileState(0.5)(42::UInt64)" +test "sumSimpleState(42::UInt64)" +test "toLowCardinality('Hello, World!')" +test "materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +test "materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +test "materialize('127.0.0.0')::IPv4" +test "materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +test "materialize(true)::Bool" +test "materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +test "materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +test "materialize((0, 0))::Point" +test "materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +test "materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" +test "materialize(42::UInt32)::Variant(UInt32, String, Tuple(a UInt32, b Array(Map(String, String))))" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=10)" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=255)" diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference new file mode 100644 index 00000000000..3a6727b70e8 --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference @@ -0,0 +1,50 @@ +-- Single partition by function +0 +2 +-- Nested partition by function +1 +2 +1 +1 +-- Nested partition by function, LowCardinality +1 +2 +1 +1 +-- Nested partition by function, Nullable +1 +2 +1 +1 +-- Nested partition by function, LowCardinality + Nullable +1 +2 +1 +1 +-- Non-safe cast +2 +2 +-- Multiple partition columns +1 +1 +1 +2 +-- LowCardinality set +1 +1 +-- Nullable set +1 +1 +-- LowCardinality + Nullable set +1 +1 +-- Not failing with date parsing functions +1 +0 +-- Pruning + not failing with nested date parsing functions +1 +2 +0 +-- Empty transform functions +2 +1 diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql new file mode 100644 index 00000000000..8ffabacaa8c --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql @@ -0,0 +1,258 @@ +SELECT '-- Single partition by function'; + +DROP TABLE IF EXISTS 03173_single_function; +CREATE TABLE 03173_single_function ( + dt Date, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY toMonth(dt); + +INSERT INTO 03173_single_function +SELECT toDate('2000-01-01') + 10 * number FROM numbers(50) +UNION ALL +SELECT toDate('2100-01-01') + 10 * number FROM numbers(50); +OPTIMIZE TABLE 03173_single_function FINAL; + +SELECT count() FROM 03173_single_function WHERE dt IN ('2024-01-20', '2024-05-25') SETTINGS log_comment='03173_single_function'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_single_function'; + +DROP TABLE IF EXISTS 03173_single_function; + +SELECT '-- Nested partition by function'; + +DROP TABLE IF EXISTS 03173_nested_function; +CREATE TABLE 03173_nested_function( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function FINAL; + +SELECT count() FROM 03173_nested_function WHERE id IN (10) SETTINGS log_comment='03173_nested_function'; +SELECT count() FROM 03173_nested_function WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr'; + +DROP TABLE IF EXISTS 03173_nested_function; + +SELECT '-- Nested partition by function, LowCardinality'; + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS 03173_nested_function_lc; +CREATE TABLE 03173_nested_function_lc( + id LowCardinality(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function_lc SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc FINAL; + +SELECT count() FROM 03173_nested_function_lc WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc'; +SELECT count() FROM 03173_nested_function_lc WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc'; + +DROP TABLE IF EXISTS 03173_nested_function_lc; + +SELECT '-- Nested partition by function, Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_null; +CREATE TABLE 03173_nested_function_null( + id Nullable(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_null FINAL; + +SELECT count() FROM 03173_nested_function_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_null'; +SELECT count() FROM 03173_nested_function_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_null'; + +DROP TABLE IF EXISTS 03173_nested_function_null; + +SELECT '-- Nested partition by function, LowCardinality + Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE 03173_nested_function_lc_null( + id LowCardinality(Nullable(Int32)), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_lc_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc_null FINAL; + +SELECT count() FROM 03173_nested_function_lc_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc_null'; +SELECT count() FROM 03173_nested_function_lc_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc_null'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SELECT '-- Non-safe cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; +CREATE TABLE 03173_nonsafe_cast( + id Int64, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nonsafe_cast SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nonsafe_cast FINAL; + +SELECT count() FROM 03173_nonsafe_cast WHERE id IN (SELECT '50' UNION ALL SELECT '99') SETTINGS log_comment='03173_nonsafe_cast'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nonsafe_cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; + +SELECT '-- Multiple partition columns'; + +DROP TABLE IF EXISTS 03173_multiple_partition_cols; +CREATE TABLE 03173_multiple_partition_cols ( + key1 Int32, + key2 Int32 +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY (intDiv(key1, 50), xxHash32(key2) % 3); + +INSERT INTO 03173_multiple_partition_cols SELECT number, number FROM numbers(100); +OPTIMIZE TABLE 03173_multiple_partition_cols FINAL; + +SELECT count() FROM 03173_multiple_partition_cols WHERE key2 IN (4) SETTINGS log_comment='03173_multiple_columns'; +SELECT count() FROM 03173_multiple_partition_cols WHERE xxHash32(key2) IN (4251411170) SETTINGS log_comment='03173_multiple_columns_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns'; +-- Due to xxHash32() in WHERE condition, MinMax is unable to eliminate any parts, +-- so partition pruning leave two parts (for key1 // 50 = 0 and key1 // 50 = 1) +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns_subexpr'; + +-- Preparing base table for filtering by LowCardinality/Nullable sets +DROP TABLE IF EXISTS 03173_base_data_source; +CREATE TABLE 03173_base_data_source( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_base_data_source SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_base_data_source FINAL; + +SELECT '-- LowCardinality set'; + +SET allow_suspicious_low_cardinality_types = 1; +DROP TABLE IF EXISTS 03173_low_cardinality_set; +CREATE TABLE 03173_low_cardinality_set (id LowCardinality(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_low_cardinality_set) SETTINGS log_comment='03173_low_cardinality_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_low_cardinality_set'; + +DROP TABLE IF EXISTS 03173_low_cardinality_set; + +SELECT '-- Nullable set'; + +DROP TABLE IF EXISTS 03173_nullable_set; +CREATE TABLE 03173_nullable_set (id Nullable(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_nullable_set) SETTINGS log_comment='03173_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nullable_set'; + +DROP TABLE IF EXISTS 03173_nullable_set; + +SELECT '-- LowCardinality + Nullable set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; +CREATE TABLE 03173_lc_nullable_set (id LowCardinality(Nullable(Int32))) ENGINE=Memory AS SELECT 10 UNION ALL SELECT NULL; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_lc_nullable_set) SETTINGS log_comment='03173_lc_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_lc_nullable_set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; + +SELECT '-- Not failing with date parsing functions'; + +DROP TABLE IF EXISTS 03173_date_parsing; +CREATE TABLE 03173_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toDate(id); + +INSERT INTO 03173_date_parsing +SELECT toString(toDate('2023-04-01') + number) +FROM numbers(20); + +SELECT count() FROM 03173_date_parsing WHERE id IN ('2023-04-02', '2023-05-02'); +SELECT count() FROM 03173_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_date_parsing; + +SELECT '-- Pruning + not failing with nested date parsing functions'; + +DROP TABLE IF EXISTS 03173_nested_date_parsing; +CREATE TABLE 03173_nested_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toMonth(toDate(id)); + +INSERT INTO 03173_nested_date_parsing +SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50) +UNION ALL +SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50); + +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing'; +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_nested_date_parsing; + +SELECT '-- Empty transform functions'; + +DROP TABLE IF EXISTS 03173_empty_transform; +CREATE TABLE 03173_empty_transform( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_empty_transform SELECT number FROM numbers(6); +OPTIMIZE TABLE 03173_empty_transform FINAL; + +SELECT id FROM 03173_empty_transform WHERE xxHash32(id) % 3 IN (xxHash32(2::Int32) % 3) SETTINGS log_comment='03173_empty_transform'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_empty_transform'; + +DROP TABLE IF EXISTS 03173_empty_transform; diff --git a/tests/queries/0_stateless/03174_projection_deduplicate.reference b/tests/queries/0_stateless/03174_projection_deduplicate.reference new file mode 100644 index 00000000000..1796b2f1dee --- /dev/null +++ b/tests/queries/0_stateless/03174_projection_deduplicate.reference @@ -0,0 +1,3 @@ +1 one +1 one +1 one diff --git a/tests/queries/0_stateless/03174_projection_deduplicate.sql b/tests/queries/0_stateless/03174_projection_deduplicate.sql new file mode 100644 index 00000000000..46222b69dc7 --- /dev/null +++ b/tests/queries/0_stateless/03174_projection_deduplicate.sql @@ -0,0 +1,30 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/65548 +DROP TABLE IF EXISTS test_projection_deduplicate; + +CREATE TABLE test_projection_deduplicate +( + `id` Int32, + `string` String, + PROJECTION test_projection + ( + SELECT id + GROUP BY id + ) +) +ENGINE = MergeTree +PRIMARY KEY id; + +INSERT INTO test_projection_deduplicate VALUES (1, 'one'); +INSERT INTO test_projection_deduplicate VALUES (1, 'one'); + +OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; -- { serverError NOT_IMPLEMENTED } + +SELECT * FROM test_projection_deduplicate; + +ALTER TABLE test_projection_deduplicate DROP PROJECTION test_projection; + +OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; + +SELECT * FROM test_projection_deduplicate; + +DROP TABLE test_projection_deduplicate; diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.reference b/tests/queries/0_stateless/03198_json_extract_more_types.reference new file mode 100644 index 00000000000..9a6580ff81b --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.reference @@ -0,0 +1,21 @@ +2020-01-01 +2020-01-01 +2020-01-01 00:00:00 +2020-01-01 00:00:00.000000 +127.0.0.1 +2001:db8:85a3::8a2e:370:7334 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +Hello +Hello +\0\0\0 +Hello\0\0\0\0\0 +5801c962-1182-458a-89f8-d077da5074f9 diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.sql b/tests/queries/0_stateless/03198_json_extract_more_types.sql new file mode 100644 index 00000000000..28d24bbb271 --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.sql @@ -0,0 +1,29 @@ +set allow_suspicious_low_cardinality_types=1; + +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date'); +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date32'); +select JSONExtract('{"a" : "2020-01-01 00:00:00"}', 'a', 'DateTime'); +select JSONExtract('{"a" : "2020-01-01 00:00:00.000000"}', 'a', 'DateTime64(6)'); +select JSONExtract('{"a" : "127.0.0.1"}', 'a', 'IPv4'); +select JSONExtract('{"a" : "2001:0db8:85a3:0000:0000:8a2e:0370:7334"}', 'a', 'IPv6'); + + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt64)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int64)'); + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); + +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(String)'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(5))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(3))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(10))'); + +select JSONExtract('{"a" : "5801c962-1182-458a-89f8-d077da5074f9"}', 'a', 'LowCardinality(UUID)'); + diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.reference b/tests/queries/0_stateless/03198_orc_read_time_zone.reference new file mode 100644 index 00000000000..809dba44400 --- /dev/null +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.reference @@ -0,0 +1 @@ +1 2024-06-30 20:00:00.000 diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.sh b/tests/queries/0_stateless/03198_orc_read_time_zone.sh new file mode 100755 index 00000000000..27530c06237 --- /dev/null +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC" +$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'" +$CLICKHOUSE_CLIENT -q "drop table test" \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference new file mode 100644 index 00000000000..aecacd10e00 --- /dev/null +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference @@ -0,0 +1,28 @@ +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +2 +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +a Nullable(String) +b Nullable(Int64) +c Nullable(Int64) +2 +a Nullable(String) +b Nullable(Int64) +c Nullable(Int64) +a Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)) +b Nullable(Int64) +c Nullable(Int64) +2 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +2 diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh new file mode 100755 index 00000000000..ce53f467823 --- /dev/null +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo -e 'a,b,c\n1,2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%skip_first_lines%';" + +echo -e 'a,b,c\n"1",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_numbers_from_strings%';" + +echo -e 'a,b,c\n"(1,2,3)",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_strings_from_quoted_tuples%';" + +echo -e 'a\tb\tc\n1\t2\t3' > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=0; +SELECT count() from system.schema_inference_cache where format = 'TSV' and additional_format_info like '%skip_first_lines%';" + + +rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv +rm $CLICKHOUSE_TEST_UNIQUE_NAME.tsv + diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.reference b/tests/queries/0_stateless/03198_table_function_directory_path.reference new file mode 100644 index 00000000000..19920de3d3c --- /dev/null +++ b/tests/queries/0_stateless/03198_table_function_directory_path.reference @@ -0,0 +1,3 @@ +2 +2 +1 diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql new file mode 100644 index 00000000000..9e2791847af --- /dev/null +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir1/dir/4.csv', 'csv') SELECT '4.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir2/dir/5.csv', 'csv') SELECT '5.csv' SETTINGS engine_file_truncate_on_insert=1; + +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/dir'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/*/dir', 'csv'); -- { serverError 74, 636 } +SELECT COUNT(*) FROM file('data_03198_table_function_directory_pat'); -- { serverError 400 } diff --git a/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference b/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference new file mode 100644 index 00000000000..f35b39d5972 --- /dev/null +++ b/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference @@ -0,0 +1,3 @@ +nan +0.58333 +0.58333 diff --git a/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql b/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql new file mode 100644 index 00000000000..5de0844f445 --- /dev/null +++ b/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql @@ -0,0 +1,32 @@ +CREATE TABLE labels_unordered +( + idx Int64, + score Float64, + label Int64 +) +ENGINE = MergeTree +PRIMARY KEY idx +ORDER BY idx; + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_unordered; + +INSERT INTO labels_unordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,1), (5,0.8,0); + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_unordered; + +CREATE TABLE labels_ordered +( + idx Int64, + score Float64, + label Int64 +) +ENGINE = MergeTree +PRIMARY KEY idx +ORDER BY idx; + +INSERT INTO labels_ordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,0), (5,0.8,1); + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_ordered; \ No newline at end of file diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.reference b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.sql b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql new file mode 100644 index 00000000000..3cb551804b7 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS 03199_fixedstring_array; +CREATE TABLE 03199_fixedstring_array (arr Array(LowCardinality(FixedString(8)))) ENGINE = Memory; +INSERT INTO 03199_fixedstring_array VALUES (['a', 'b']), (['c', 'd']); + +SELECT has(arr, toFixedString(materialize('a'), 1)) FROM 03199_fixedstring_array; + +DROP TABLE 03199_fixedstring_array; diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.reference b/tests/queries/0_stateless/03199_join_with_materialized_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.sql b/tests/queries/0_stateless/03199_join_with_materialized_column.sql new file mode 100644 index 00000000000..8c53c5b3e66 --- /dev/null +++ b/tests/queries/0_stateless/03199_join_with_materialized_column.sql @@ -0,0 +1,6 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS table_with_materialized; +CREATE TABLE table_with_materialized (col String MATERIALIZED 'A') ENGINE = Memory; +SELECT number FROM numbers(1) AS n, table_with_materialized; +DROP TABLE table_with_materialized; diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference new file mode 100644 index 00000000000..759b7763cd1 --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -0,0 +1,30 @@ +true Bool +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +42.42 Float64 +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +Hello String +2020-01-01 Date +2020-01-01 00:00:00.000000000 DateTime64(9) +[1,2,3] Array(Nullable(Int64)) +['str1','str2','str3'] Array(Nullable(String)) +[[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) +['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01','2020-01-01 date'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) +['str','42'] Array(Nullable(String)) +[42,42.42] Array(Nullable(Float64)) +[42,18446744073709552000,42.42] Array(Nullable(Float64)) +[42,42.42] Array(Nullable(Float64)) +[NULL,NULL] Array(Nullable(String)) +[NULL,42] Array(Nullable(Int64)) +[[NULL],[],[42]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],[1,NULL,3],[NULL,2,NULL]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],['1',NULL,'3'],[NULL,'2',NULL],['2020-01-01']] Array(Array(Nullable(String))) +('str',42,[42]) Tuple(Nullable(String), Nullable(Int64), Array(Nullable(Int64))) +[42,18446744073709551615] Array(Nullable(UInt64)) +(-42,18446744073709551615) Tuple(Nullable(Int64), Nullable(UInt64)) diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.sql b/tests/queries/0_stateless/03199_json_extract_dynamic.sql new file mode 100644 index 00000000000..286949f4d3e --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.sql @@ -0,0 +1,37 @@ +set input_format_json_try_infer_numbers_from_strings=1; + +select JSONExtract(materialize('{"d" : true}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : -42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 18446744073709551615}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42.42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "-42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "18446744073709551615"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : "Hello"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01 00:00:00.000"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [1, 2, 3]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str1", "str2", "str3"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[[1], [2, 3, 4]], [[5, 6], [7]]]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 date"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "str"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [null, null]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [null, 42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[null], [], [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null]]}'), 'a', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null], ["2020-01-01"]]}'), 'a', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["str", 42, [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [-42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.reference b/tests/queries/0_stateless/03199_merge_filters_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql new file mode 100644 index 00000000000..ed2ec2ea217 --- /dev/null +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -0,0 +1,70 @@ +drop table if exists t1; +drop table if exists t2; + +CREATE TABLE t1 +( + `s1` String, + `s2` String, + `s3` String +) +ENGINE = MergeTree +ORDER BY tuple(); + + +CREATE TABLE t2 +( + `fs1` FixedString(10), + `fs2` FixedString(10) +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO t1 SELECT + repeat('t', 15) s1, + 'test' s2, + 'test' s3; + +INSERT INTO t1 SELECT + substring(s1, 1, 10), + s2, + s3 +FROM generateRandom('s1 String, s2 String, s3 String') +LIMIT 10000; + +INSERT INTO t2 SELECT * +FROM generateRandom() +LIMIT 10000; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + +optimize table t1 final; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')); diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference new file mode 100644 index 00000000000..10ce589000d --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference @@ -0,0 +1,27 @@ +5 (4230072075578472911,4230072075578472911) 71789584853496063 +2 (4401188181514187637,4401188181514187637) 878466845199253299 +4 (4940826638032106783,4940826638032106783) 3675164899122807807 +6 (10957420562507184961,10957420562507184961) 3732623117916254211 +0 (797076400500506358,797076400500506358) 3746094338409299772 +7 (10843611042193511775,10843611042193511775) 4607251742847087615 +3 (12588286986351526898,12588286986351526898) 13889114719560662796 +8 (452995860660674674,452995860660674674) 17365664920787500812 +9 (12206106972241516904,12206106972241516904) 17567684527097330880 +1 (14558425114501132193,14558425114501132193) 18445898820068822019 +3 255 255 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 +1 +2 +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql new file mode 100644 index 00000000000..c32d7524492 --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql @@ -0,0 +1,41 @@ +SET allow_experimental_analyzer=1; + +SELECT *, ngramMinHash(*) AS minhash, mortonEncode(untuple(ngramMinHash(*))) AS z +FROM (SELECT toString(number) FROM numbers(10)) +ORDER BY z LIMIT 100; + +CREATE TABLE test ( + idx UInt64, + coverage Array(UInt64), + test_name String +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO test VALUES (10, [0,1,2,3], 'xx'), (20, [3,4,5,6], 'xxx'), (90, [3,4,5,6,9], 'xxxx'); + +WITH + 4096 AS w, 4096 AS h, w * h AS pixels, + arrayJoin(coverage) AS num, + num DIV (32768 * 32768 DIV pixels) AS idx, + mortonDecode(2, idx) AS coord, + 255 AS b, + least(255, uniq(test_name)) AS r, + 255 * uniq(test_name) / (max(uniq(test_name)) OVER ()) AS g +SELECT r::UInt8, g::UInt8, b::UInt8 +FROM test +GROUP BY coord +ORDER BY coord.2 * w + coord.1 +WITH FILL FROM 0 TO 10; + + +CREATE TABLE seq ( + number UInt64 +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO seq VALUES (0), (6), (7); + +WITH (Select min(number), max(number) from seq) as range Select * from numbers(range.1, range.2); + diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference b/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh new file mode 100755 index 00000000000..337debebb14 --- /dev/null +++ b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# check for buffer overflow in unbin (due to not enough memory preallocate for output buffer) +# we iterate over all remainders of input string length modulo word_size and check that no assertions are triggered + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done diff --git a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql new file mode 100644 index 00000000000..2dd0a37657d --- /dev/null +++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_subcolumns_join; + +CREATE TABLE t_subcolumns_join (id UInt64) ENGINE=MergeTree ORDER BY tuple(); + +INSERT INTO t_subcolumns_join SELECT number as number FROM numbers(10000); + +SELECT + count() +FROM (SELECT number FROM numbers(10)) as tbl LEFT JOIN t_subcolumns_join ON number = id +WHERE id is null +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1, join_use_nulls = 1; + +DROP TABLE t_subcolumns_join; diff --git a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference new file mode 100644 index 00000000000..912bff45da5 --- /dev/null +++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference @@ -0,0 +1,11 @@ +str_array Array(String) +1318 +5779 +1715 +6422 +5875 +1887 +3763 +4245 +4270 +758 diff --git a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh new file mode 100755 index 00000000000..dcecd7b3bea --- /dev/null +++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_avro + +# See https://github.com/ClickHouse/ClickHouse/issues/60438 +$CLICKHOUSE_LOCAL -q "DESC file('$DATA_DIR/negative_block_size_arrays.avro')" +$CLICKHOUSE_LOCAL -q "SELECT arraySum(arrayMap(x -> length(x), str_array)) AS res FROM file('$DATA_DIR/negative_block_size_arrays.avro')" diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference new file mode 100644 index 00000000000..62f5eb45106 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference @@ -0,0 +1,24 @@ +QUERY id: 0 + PROJECTION COLUMNS + (sumIf(toInt64(1), 1)) Tuple(Int64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Int64) + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Int64_1, constant_value_type: Int64 + EXPRESSION + FUNCTION id: 7, function_name: toInt64, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8 + SETTINGS optimize_rewrite_sum_if_to_count_if=1 diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql new file mode 100644 index 00000000000..24369fd6497 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql @@ -0,0 +1,2 @@ +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT tuple(sumIf(toInt64(1), 1)) FROM numbers(100) settings optimize_rewrite_sum_if_to_count_if=1; diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference new file mode 100644 index 00000000000..8565fe3d0fa --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference @@ -0,0 +1,402 @@ +Memory +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree compact +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree wide +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh new file mode 100755 index 00000000000..8231691e184 --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(36)" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id" + + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference new file mode 100644 index 00000000000..8740726c7ef --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh new file mode 100755 index 00000000000..aa06e48376c --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.Int8.null, d.Date.null, d.\`Array(String)\`.null from test format Null" + $CH_CLIENT -q "select d, d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.String.null from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64.null, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro new file mode 100644 index 00000000000..ec785a885dc Binary files /dev/null and b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro differ diff --git a/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc new file mode 100644 index 00000000000..ab1b785dbbf Binary files /dev/null and b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc differ diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 614bfcece8f..ef2d89f0218 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -54,9 +54,17 @@ export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfu export CLICKHOUSE_COMPRESSOR=${CLICKHOUSE_COMPRESSOR:="${CLICKHOUSE_BINARY}-compressor"} export CLICKHOUSE_GIT_IMPORT=${CLICKHOUSE_GIT_IMPORT="${CLICKHOUSE_BINARY}-git-import"} +export CLICKHOUSE_CONFIG_DIR=${CLICKHOUSE_CONFIG_DIR:="/etc/clickhouse-server"} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"} export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"} +export CLICKHOUSE_USER_FILES=${CLICKHOUSE_USER_FILES:="/var/lib/clickhouse/user_files"} +export CLICKHOUSE_USER_FILES_UNIQUE=${CLICKHOUSE_USER_FILES_UNIQUE:="${CLICKHOUSE_USER_FILES}/${CLICKHOUSE_TEST_UNIQUE_NAME}"} +# synonym +export USER_FILES_PATH=$CLICKHOUSE_USER_FILES + +export CLICKHOUSE_SCHEMA_FILES=${CLICKHOUSE_SCHEMA_FILES:="/var/lib/clickhouse/format_schemas"} + [ -x "${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY-extract-from-config --config=$CLICKHOUSE_CONFIG"} [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY extract-from-config --config=$CLICKHOUSE_CONFIG"} export CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY-extract-from-config --config=$CLICKHOUSE_CONFIG"} diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index fa2bfef935a..ca2c4ec4192 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2758 +personal_ws-1.1 en 2942 AArch ACLs ALTERs @@ -449,7 +449,7 @@ Kahan Kaser KeeperAliveConnections KeeperMap -KeeperOutstandingRequets +KeeperOutstandingRequests Kerberos Khanna KittenHouse @@ -1098,6 +1098,8 @@ aggregatefunction aggregatingmergetree aggregatio aggretate +aggthrow +aggThrow aiochclient allocator alphaTokens @@ -1656,9 +1658,9 @@ fsync func fuzzBits fuzzJSON +fuzzQuery fuzzer fuzzers -fuzzQuery gRPC gccMurmurHash gcem @@ -1898,11 +1900,13 @@ kurtosis kurtpop kurtsamp laion +lagInFrame lang laravel largestTriangleThreeBuckets latencies ldap +leadInFrame leftPad leftPadUTF leftUTF @@ -1980,6 +1984,8 @@ mapExtractKeyLike mapFilter mapFromArrays mapKeys +mapPartialReverseSort +mapPartialSort mapPopulateSeries mapReverseSort mapSort @@ -1996,6 +2002,7 @@ maxMap maxintersections maxintersectionsposition maxmap +minMappedArrays maxmind mdadm meanZTest @@ -2013,6 +2020,7 @@ metrica metroHash mfedotov minMap +minMappedArrays minSampleSizeContinuous minSampleSizeConversion mindsdb @@ -2120,8 +2128,10 @@ noaa nonNegativeDerivative noop normalizeQuery +normalizeQueryKeepNames normalizeUTF normalizedQueryHash +normalizedQueryHashKeepNames notEmpty notEquals notILike @@ -2795,6 +2805,7 @@ tupleModulo tupleModuloByNumber tupleMultiply tupleMultiplyByNumber +tupleNames tupleNegate tuplePlus tupleToNameValuePairs diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8112ed9083b..271065a78fb 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 @@ -6,6 +7,7 @@ v24.5.1.1763-stable 2024-06-01 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03